summaryrefslogtreecommitdiffstats
path: root/arch/ia64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64')
-rw-r--r--arch/ia64/Kconfig420
-rw-r--r--arch/ia64/Kconfig.debug64
-rw-r--r--arch/ia64/Makefile115
-rw-r--r--arch/ia64/configs/bigsur_defconfig1172
-rw-r--r--arch/ia64/configs/sim_defconfig534
-rw-r--r--arch/ia64/configs/sn2_defconfig1038
-rw-r--r--arch/ia64/configs/tiger_defconfig1098
-rw-r--r--arch/ia64/configs/zx1_defconfig1273
-rw-r--r--arch/ia64/defconfig1199
-rw-r--r--arch/ia64/dig/Makefile9
-rw-r--r--arch/ia64/dig/machvec.c3
-rw-r--r--arch/ia64/dig/setup.c86
-rw-r--r--arch/ia64/hp/common/Makefile10
-rw-r--r--arch/ia64/hp/common/hwsw_iommu.c185
-rw-r--r--arch/ia64/hp/common/sba_iommu.c2121
-rw-r--r--arch/ia64/hp/sim/Kconfig20
-rw-r--r--arch/ia64/hp/sim/Makefile16
-rw-r--r--arch/ia64/hp/sim/boot/Makefile37
-rw-r--r--arch/ia64/hp/sim/boot/boot_head.S144
-rw-r--r--arch/ia64/hp/sim/boot/bootloader.c176
-rw-r--r--arch/ia64/hp/sim/boot/bootloader.lds65
-rw-r--r--arch/ia64/hp/sim/boot/fw-emu.c398
-rw-r--r--arch/ia64/hp/sim/boot/ssc.h35
-rw-r--r--arch/ia64/hp/sim/hpsim.S10
-rw-r--r--arch/ia64/hp/sim/hpsim_console.c65
-rw-r--r--arch/ia64/hp/sim/hpsim_irq.c51
-rw-r--r--arch/ia64/hp/sim/hpsim_machvec.c3
-rw-r--r--arch/ia64/hp/sim/hpsim_setup.c52
-rw-r--r--arch/ia64/hp/sim/hpsim_ssc.h36
-rw-r--r--arch/ia64/hp/sim/simeth.c530
-rw-r--r--arch/ia64/hp/sim/simscsi.c404
-rw-r--r--arch/ia64/hp/sim/simserial.c1032
-rw-r--r--arch/ia64/hp/zx1/Makefile8
-rw-r--r--arch/ia64/hp/zx1/hpzx1_machvec.c3
-rw-r--r--arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c3
-rw-r--r--arch/ia64/ia32/Makefile12
-rw-r--r--arch/ia64/ia32/binfmt_elf32.c294
-rw-r--r--arch/ia64/ia32/elfcore32.h138
-rw-r--r--arch/ia64/ia32/ia32_entry.S500
-rw-r--r--arch/ia64/ia32/ia32_ioctl.c48
-rw-r--r--arch/ia64/ia32/ia32_ldt.c147
-rw-r--r--arch/ia64/ia32/ia32_signal.c1036
-rw-r--r--arch/ia64/ia32/ia32_support.c264
-rw-r--r--arch/ia64/ia32/ia32_traps.c156
-rw-r--r--arch/ia64/ia32/ia32priv.h544
-rw-r--r--arch/ia64/ia32/sys_ia32.c2747
-rw-r--r--arch/ia64/install.sh40
-rw-r--r--arch/ia64/kernel/Makefile52
-rw-r--r--arch/ia64/kernel/acpi-ext.c100
-rw-r--r--arch/ia64/kernel/acpi.c841
-rw-r--r--arch/ia64/kernel/asm-offsets.c239
-rw-r--r--arch/ia64/kernel/brl_emu.c234
-rw-r--r--arch/ia64/kernel/cyclone.c109
-rw-r--r--arch/ia64/kernel/domain.c382
-rw-r--r--arch/ia64/kernel/efi.c832
-rw-r--r--arch/ia64/kernel/efi_stub.S86
-rw-r--r--arch/ia64/kernel/entry.S1587
-rw-r--r--arch/ia64/kernel/entry.h82
-rw-r--r--arch/ia64/kernel/fsys.S884
-rw-r--r--arch/ia64/kernel/gate-data.S3
-rw-r--r--arch/ia64/kernel/gate.S372
-rw-r--r--arch/ia64/kernel/gate.lds.S95
-rw-r--r--arch/ia64/kernel/head.S996
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c127
-rw-r--r--arch/ia64/kernel/init_task.c46
-rw-r--r--arch/ia64/kernel/iosapic.c827
-rw-r--r--arch/ia64/kernel/irq.c238
-rw-r--r--arch/ia64/kernel/irq_ia64.c278
-rw-r--r--arch/ia64/kernel/irq_lsapic.c37
-rw-r--r--arch/ia64/kernel/ivt.S1619
-rw-r--r--arch/ia64/kernel/machvec.c70
-rw-r--r--arch/ia64/kernel/mca.c1470
-rw-r--r--arch/ia64/kernel/mca_asm.S928
-rw-r--r--arch/ia64/kernel/mca_drv.c639
-rw-r--r--arch/ia64/kernel/mca_drv.h113
-rw-r--r--arch/ia64/kernel/mca_drv_asm.S45
-rw-r--r--arch/ia64/kernel/minstate.h251
-rw-r--r--arch/ia64/kernel/module.c952
-rw-r--r--arch/ia64/kernel/pal.S302
-rw-r--r--arch/ia64/kernel/palinfo.c1023
-rw-r--r--arch/ia64/kernel/patch.c189
-rw-r--r--arch/ia64/kernel/perfmon.c6676
-rw-r--r--arch/ia64/kernel/perfmon_default_smpl.c306
-rw-r--r--arch/ia64/kernel/perfmon_generic.h45
-rw-r--r--arch/ia64/kernel/perfmon_itanium.h115
-rw-r--r--arch/ia64/kernel/perfmon_mckinley.h187
-rw-r--r--arch/ia64/kernel/process.c800
-rw-r--r--arch/ia64/kernel/ptrace.c1627
-rw-r--r--arch/ia64/kernel/sal.c302
-rw-r--r--arch/ia64/kernel/salinfo.c629
-rw-r--r--arch/ia64/kernel/semaphore.c165
-rw-r--r--arch/ia64/kernel/setup.c723
-rw-r--r--arch/ia64/kernel/sigframe.h25
-rw-r--r--arch/ia64/kernel/signal.c691
-rw-r--r--arch/ia64/kernel/smp.c376
-rw-r--r--arch/ia64/kernel/smpboot.c692
-rw-r--r--arch/ia64/kernel/sys_ia64.c298
-rw-r--r--arch/ia64/kernel/time.c255
-rw-r--r--arch/ia64/kernel/topology.c92
-rw-r--r--arch/ia64/kernel/traps.c609
-rw-r--r--arch/ia64/kernel/unaligned.c1521
-rw-r--r--arch/ia64/kernel/unwind.c2306
-rw-r--r--arch/ia64/kernel/unwind_decoder.c459
-rw-r--r--arch/ia64/kernel/unwind_i.h164
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S251
-rw-r--r--arch/ia64/lib/Makefile52
-rw-r--r--arch/ia64/lib/bitop.c88
-rw-r--r--arch/ia64/lib/carta_random.S54
-rw-r--r--arch/ia64/lib/checksum.c102
-rw-r--r--arch/ia64/lib/clear_page.S77
-rw-r--r--arch/ia64/lib/clear_user.S209
-rw-r--r--arch/ia64/lib/copy_page.S98
-rw-r--r--arch/ia64/lib/copy_page_mck.S185
-rw-r--r--arch/ia64/lib/copy_user.S610
-rw-r--r--arch/ia64/lib/csum_partial_copy.c151
-rw-r--r--arch/ia64/lib/dec_and_lock.c42
-rw-r--r--arch/ia64/lib/do_csum.S323
-rw-r--r--arch/ia64/lib/flush.S39
-rw-r--r--arch/ia64/lib/idiv32.S83
-rw-r--r--arch/ia64/lib/idiv64.S80
-rw-r--r--arch/ia64/lib/io.c165
-rw-r--r--arch/ia64/lib/ip_fast_csum.S90
-rw-r--r--arch/ia64/lib/memcpy.S301
-rw-r--r--arch/ia64/lib/memcpy_mck.S661
-rw-r--r--arch/ia64/lib/memset.S362
-rw-r--r--arch/ia64/lib/strlen.S192
-rw-r--r--arch/ia64/lib/strlen_user.S198
-rw-r--r--arch/ia64/lib/strncpy_from_user.S44
-rw-r--r--arch/ia64/lib/strnlen_user.S45
-rw-r--r--arch/ia64/lib/swiotlb.c658
-rw-r--r--arch/ia64/lib/xor.S184
-rw-r--r--arch/ia64/mm/Makefile12
-rw-r--r--arch/ia64/mm/contig.c299
-rw-r--r--arch/ia64/mm/discontig.c737
-rw-r--r--arch/ia64/mm/extable.c90
-rw-r--r--arch/ia64/mm/fault.c261
-rw-r--r--arch/ia64/mm/hugetlbpage.c357
-rw-r--r--arch/ia64/mm/init.c597
-rw-r--r--arch/ia64/mm/numa.c49
-rw-r--r--arch/ia64/mm/tlb.c190
-rw-r--r--arch/ia64/module.lds13
-rw-r--r--arch/ia64/oprofile/Kconfig26
-rw-r--r--arch/ia64/oprofile/Makefile10
-rw-r--r--arch/ia64/oprofile/backtrace.c150
-rw-r--r--arch/ia64/oprofile/init.c38
-rw-r--r--arch/ia64/oprofile/perfmon.c100
-rw-r--r--arch/ia64/pci/Makefile4
-rw-r--r--arch/ia64/pci/pci.c735
-rwxr-xr-xarch/ia64/scripts/check-gas15
-rw-r--r--arch/ia64/scripts/check-gas-asm.S2
-rw-r--r--arch/ia64/scripts/check-model.c1
-rw-r--r--arch/ia64/scripts/check-segrel.S4
-rw-r--r--arch/ia64/scripts/check-segrel.lds11
-rw-r--r--arch/ia64/scripts/check-serialize.S2
-rw-r--r--arch/ia64/scripts/check-text-align.S6
-rwxr-xr-xarch/ia64/scripts/toolchain-flags53
-rwxr-xr-xarch/ia64/scripts/unwcheck.py64
-rw-r--r--arch/ia64/sn/Makefile14
-rw-r--r--arch/ia64/sn/include/ioerror.h81
-rw-r--r--arch/ia64/sn/include/pci/pcibr_provider.h149
-rw-r--r--arch/ia64/sn/include/pci/pcibus_provider_defs.h43
-rw-r--r--arch/ia64/sn/include/pci/pcidev.h54
-rw-r--r--arch/ia64/sn/include/pci/pic.h261
-rw-r--r--arch/ia64/sn/include/pci/tiocp.h256
-rw-r--r--arch/ia64/sn/include/tio.h37
-rw-r--r--arch/ia64/sn/include/xtalk/hubdev.h67
-rw-r--r--arch/ia64/sn/include/xtalk/xbow.h291
-rw-r--r--arch/ia64/sn/include/xtalk/xwidgetdev.h70
-rw-r--r--arch/ia64/sn/kernel/Makefile12
-rw-r--r--arch/ia64/sn/kernel/bte.c453
-rw-r--r--arch/ia64/sn/kernel/bte_error.c198
-rw-r--r--arch/ia64/sn/kernel/huberror.c201
-rw-r--r--arch/ia64/sn/kernel/idle.c30
-rw-r--r--arch/ia64/sn/kernel/io_init.c411
-rw-r--r--arch/ia64/sn/kernel/iomv.c70
-rw-r--r--arch/ia64/sn/kernel/irq.c431
-rw-r--r--arch/ia64/sn/kernel/klconflib.c108
-rw-r--r--arch/ia64/sn/kernel/machvec.c11
-rw-r--r--arch/ia64/sn/kernel/mca.c135
-rw-r--r--arch/ia64/sn/kernel/setup.c621
-rw-r--r--arch/ia64/sn/kernel/sn2/Makefile13
-rw-r--r--arch/ia64/sn/kernel/sn2/cache.c34
-rw-r--r--arch/ia64/sn/kernel/sn2/io.c101
-rw-r--r--arch/ia64/sn/kernel/sn2/prominfo_proc.c279
-rw-r--r--arch/ia64/sn/kernel/sn2/ptc_deadlock.S82
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c295
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_hwperf.c690
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_proc_fs.c149
-rw-r--r--arch/ia64/sn/kernel/sn2/timer.c36
-rw-r--r--arch/ia64/sn/kernel/sn2/timer_interrupt.c63
-rw-r--r--arch/ia64/sn/pci/Makefile10
-rw-r--r--arch/ia64/sn/pci/pci_dma.c363
-rw-r--r--arch/ia64/sn/pci/pcibr/Makefile11
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_ate.c188
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_dma.c379
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_provider.c170
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_reg.c282
197 files changed, 69771 insertions, 0 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
new file mode 100644
index 00000000000..33fcb205fcb
--- /dev/null
+++ b/arch/ia64/Kconfig
@@ -0,0 +1,420 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+mainmenu "IA-64 Linux Kernel Configuration"
+
+source "init/Kconfig"
+
+menu "Processor type and features"
+
+config IA64
+ bool
+ default y
+ help
+ The Itanium Processor Family is Intel's 64-bit successor to
+ the 32-bit X86 line. The IA-64 Linux project has a home
+ page at <http://www.linuxia64.org/> and a mailing list at
+ <linux-ia64@vger.kernel.org>.
+
+config 64BIT
+ bool
+ default y
+
+config MMU
+ bool
+ default y
+
+config RWSEM_XCHGADD_ALGORITHM
+ bool
+ default y
+
+config GENERIC_CALIBRATE_DELAY
+ bool
+ default y
+
+config TIME_INTERPOLATION
+ bool
+ default y
+
+config EFI
+ bool
+ default y
+
+config GENERIC_IOMAP
+ bool
+ default y
+
+choice
+ prompt "System type"
+ default IA64_GENERIC
+
+config IA64_GENERIC
+ bool "generic"
+ select NUMA
+ select ACPI_NUMA
+ select VIRTUAL_MEM_MAP
+ select DISCONTIGMEM
+ help
+ This selects the system type of your hardware. A "generic" kernel
+ will run on any supported IA-64 system. However, if you configure
+ a kernel for your specific system, it will be faster and smaller.
+
+ generic For any supported IA-64 system
+ DIG-compliant For DIG ("Developer's Interface Guide") compliant systems
+ HP-zx1/sx1000 For HP systems
+ HP-zx1/sx1000+swiotlb For HP systems with (broken) DMA-constrained devices.
+ SGI-SN2 For SGI Altix systems
+ Ski-simulator For the HP simulator <http://www.hpl.hp.com/research/linux/ski/>
+
+ If you don't know what to do, choose "generic".
+
+config IA64_DIG
+ bool "DIG-compliant"
+
+config IA64_HP_ZX1
+ bool "HP-zx1/sx1000"
+ help
+ Build a kernel that runs on HP zx1 and sx1000 systems. This adds
+ support for the HP I/O MMU.
+
+config IA64_HP_ZX1_SWIOTLB
+ bool "HP-zx1/sx1000 with software I/O TLB"
+ help
+ Build a kernel that runs on HP zx1 and sx1000 systems even when they
+ have broken PCI devices which cannot DMA to full 32 bits. Apart
+ from support for the HP I/O MMU, this includes support for the software
+ I/O TLB, which allows supporting the broken devices at the expense of
+ wasting some kernel memory (about 2MB by default).
+
+config IA64_SGI_SN2
+ bool "SGI-SN2"
+ help
+ Selecting this option will optimize the kernel for use on sn2 based
+ systems, but the resulting kernel binary will not run on other
+ types of ia64 systems. If you have an SGI Altix system, it's safe
+ to select this option. If in doubt, select ia64 generic support
+ instead.
+
+config IA64_HP_SIM
+ bool "Ski-simulator"
+
+endchoice
+
+choice
+ prompt "Processor type"
+ default ITANIUM
+
+config ITANIUM
+ bool "Itanium"
+ help
+ Select your IA-64 processor type. The default is Itanium.
+ This choice is safe for all IA-64 systems, but may not perform
+ optimally on systems with, say, Itanium 2 or newer processors.
+
+config MCKINLEY
+ bool "Itanium 2"
+ help
+ Select this to configure for an Itanium 2 (McKinley) processor.
+
+endchoice
+
+choice
+ prompt "Kernel page size"
+ default IA64_PAGE_SIZE_16KB
+
+config IA64_PAGE_SIZE_4KB
+ bool "4KB"
+ help
+ This lets you select the page size of the kernel. For best IA-64
+ performance, a page size of 8KB or 16KB is recommended. For best
+ IA-32 compatibility, a page size of 4KB should be selected (the vast
+ majority of IA-32 binaries work perfectly fine with a larger page
+ size). For Itanium 2 or newer systems, a page size of 64KB can also
+ be selected.
+
+ 4KB For best IA-32 compatibility
+ 8KB For best IA-64 performance
+ 16KB For best IA-64 performance
+ 64KB Requires Itanium 2 or newer processor.
+
+ If you don't know what to do, choose 16KB.
+
+config IA64_PAGE_SIZE_8KB
+ bool "8KB"
+
+config IA64_PAGE_SIZE_16KB
+ bool "16KB"
+
+config IA64_PAGE_SIZE_64KB
+ depends on !ITANIUM
+ bool "64KB"
+
+endchoice
+
+config IA64_BRL_EMU
+ bool
+ depends on ITANIUM
+ default y
+
+# align cache-sensitive data to 128 bytes
+config IA64_L1_CACHE_SHIFT
+ int
+ default "7" if MCKINLEY
+ default "6" if ITANIUM
+
+# align cache-sensitive data to 64 bytes
+config NUMA
+ bool "NUMA support"
+ depends on !IA64_HP_SIM
+ default y if IA64_SGI_SN2
+ select ACPI_NUMA
+ help
+ Say Y to compile the kernel to support NUMA (Non-Uniform Memory
+ Access). This option is for configuring high-end multiprocessor
+ server systems. If in doubt, say N.
+
+config VIRTUAL_MEM_MAP
+ bool "Virtual mem map"
+ default y if !IA64_HP_SIM
+ help
+ Say Y to compile the kernel with support for a virtual mem map.
+ This code also only takes effect if a memory hole of greater than
+ 1 Gb is found during boot. You must turn this option on if you
+ require the DISCONTIGMEM option for your machine. If you are
+ unsure, say Y.
+
+config HOLES_IN_ZONE
+ bool
+ default y if VIRTUAL_MEM_MAP
+
+config DISCONTIGMEM
+ bool "Discontiguous memory support"
+ depends on (IA64_DIG || IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB) && NUMA && VIRTUAL_MEM_MAP
+ default y if (IA64_SGI_SN2 || IA64_GENERIC) && NUMA
+ help
+ Say Y to support efficient handling of discontiguous physical memory,
+ for architectures which are either NUMA (Non-Uniform Memory Access)
+ or have huge holes in the physical address space for other reasons.
+ See <file:Documentation/vm/numa> for more.
+
+config IA64_CYCLONE
+ bool "Cyclone (EXA) Time Source support"
+ help
+ Say Y here to enable support for IBM EXA Cyclone time source.
+ If you're unsure, answer N.
+
+config IOSAPIC
+ bool
+ depends on !IA64_HP_SIM
+ default y
+
+config IA64_SGI_SN_SIM
+ bool "SGI Medusa Simulator Support"
+ depends on IA64_SGI_SN2
+ help
+ If you are compiling a kernel that will run under SGI's IA-64
+ simulator (Medusa) then say Y, otherwise say N.
+
+config FORCE_MAX_ZONEORDER
+ int
+ default "18"
+
+config SMP
+ bool "Symmetric multi-processing support"
+ help
+ This enables support for systems with more than one CPU. If you have
+ a system with only one CPU, say N. If you have a system with more
+ than one CPU, say Y.
+
+ If you say N here, the kernel will run on single and multiprocessor
+ systems, but will use only one CPU of a multiprocessor system. If
+ you say Y here, the kernel will run on many, but not all,
+ single processor systems. On a single processor system, the kernel
+ will run faster if you say N here.
+
+ See also the <file:Documentation/smp.txt> and the SMP-HOWTO
+ available at <http://www.tldp.org/docs.html#howto>.
+
+ If you don't know what to do here, say N.
+
+config NR_CPUS
+ int "Maximum number of CPUs (2-512)"
+ range 2 512
+ depends on SMP
+ default "64"
+ help
+ You should set this to the number of CPUs in your system, but
+ keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but
+ only use 2 CPUs on a >2 CPU system. Setting this to a value larger
+ than 64 will cause the use of a CPU mask array, causing a small
+ performance hit.
+
+config HOTPLUG_CPU
+ bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
+ depends on SMP && EXPERIMENTAL
+ select HOTPLUG
+ default n
+ ---help---
+ Say Y here to experiment with turning CPUs off and on. CPUs
+ can be controlled through /sys/devices/system/cpu/cpu#.
+ Say N if you want to disable CPU hotplug.
+
+config PREEMPT
+ bool "Preemptible Kernel"
+ help
+ This option reduces the latency of the kernel when reacting to
+ real-time or interactive events by allowing a low priority process to
+ be preempted even if it is in kernel mode executing a system call.
+ This allows applications to run more reliably even when the system is
+ under load.
+
+ Say Y here if you are building a kernel for a desktop, embedded
+ or real-time system. Say N if you are unsure.
+
+config HAVE_DEC_LOCK
+ bool
+ depends on (SMP || PREEMPT)
+ default y
+
+config IA32_SUPPORT
+ bool "Support for Linux/x86 binaries"
+ help
+ IA-64 processors can execute IA-32 (X86) instructions. By
+ saying Y here, the kernel will include IA-32 system call
+ emulation support which makes it possible to transparently
+ run IA-32 Linux binaries on an IA-64 Linux system.
+ If in doubt, say Y.
+
+config COMPAT
+ bool
+ depends on IA32_SUPPORT
+ default y
+
+config IA64_MCA_RECOVERY
+ tristate "MCA recovery from errors other than TLB."
+
+config PERFMON
+ bool "Performance monitor support"
+ help
+ Selects whether support for the IA-64 performance monitor hardware
+ is included in the kernel. This makes some kernel data-structures a
+ little bigger and slows down execution a bit, but it is generally
+ a good idea to turn this on. If you're unsure, say Y.
+
+config IA64_PALINFO
+ tristate "/proc/pal support"
+ help
+ If you say Y here, you are able to get PAL (Processor Abstraction
+ Layer) information in /proc/pal. This contains useful information
+ about the processors in your systems, such as cache and TLB sizes
+ and the PAL firmware version in use.
+
+ To use this option, you have to ensure that the "/proc file system
+ support" (CONFIG_PROC_FS) is enabled, too.
+
+config ACPI_DEALLOCATE_IRQ
+ bool
+ depends on IOSAPIC && EXPERIMENTAL
+ default y
+
+source "drivers/firmware/Kconfig"
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+menu "Power management and ACPI"
+
+config PM
+ bool "Power Management support"
+ depends on IA64_GENERIC || IA64_DIG || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB
+ default y
+ help
+ "Power Management" means that parts of your computer are shut
+ off or put into a power conserving "sleep" mode if they are not
+ being used. There are two competing standards for doing this: APM
+ and ACPI. If you want to use either one, say Y here and then also
+ to the requisite support below.
+
+ Power Management is most important for battery powered laptop
+ computers; if you have a laptop, check out the Linux Laptop home
+ page on the WWW at <http://www.linux-on-laptops.com/> and the
+ Battery Powered Linux mini-HOWTO, available from
+ <http://www.tldp.org/docs.html#howto>.
+
+ Note that, even if you say N here, Linux on the x86 architecture
+ will issue the hlt instruction if nothing is to be done, thereby
+ sending the processor to sleep and saving power.
+
+config ACPI
+ bool
+ depends on !IA64_HP_SIM
+ default y
+
+if !IA64_HP_SIM
+
+source "drivers/acpi/Kconfig"
+
+endif
+
+endmenu
+
+if !IA64_HP_SIM
+
+menu "Bus options (PCI, PCMCIA)"
+
+config PCI
+ bool "PCI support"
+ help
+ Find out whether you have a PCI motherboard. PCI is the name of a
+ bus system, i.e. the way the CPU talks to the other stuff inside
+ your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
+ VESA. If you have PCI, say Y, otherwise N.
+
+ The PCI-HOWTO, available from
+ <http://www.tldp.org/docs.html#howto>, contains valuable
+ information about which PCI hardware does work under Linux and which
+ doesn't.
+
+config PCI_DOMAINS
+ bool
+ default PCI
+
+source "drivers/pci/Kconfig"
+
+source "drivers/pci/hotplug/Kconfig"
+
+source "drivers/pcmcia/Kconfig"
+
+endmenu
+
+endif
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "lib/Kconfig"
+
+#
+# Use the generic interrupt handling code in kernel/irq/:
+#
+config GENERIC_HARDIRQS
+ bool
+ default y
+
+config GENERIC_IRQ_PROBE
+ bool
+ default y
+
+source "arch/ia64/hp/sim/Kconfig"
+
+source "arch/ia64/oprofile/Kconfig"
+
+source "arch/ia64/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug
new file mode 100644
index 00000000000..de9d507ba0f
--- /dev/null
+++ b/arch/ia64/Kconfig.debug
@@ -0,0 +1,64 @@
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+choice
+ prompt "Physical memory granularity"
+ default IA64_GRANULE_64MB
+
+config IA64_GRANULE_16MB
+ bool "16MB"
+ help
+ IA-64 identity-mapped regions use a large page size called "granules".
+
+ Select "16MB" for a small granule size.
+ Select "64MB" for a large granule size. This is the current default.
+
+config IA64_GRANULE_64MB
+ bool "64MB"
+ depends on !(IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_SGI_SN2)
+
+endchoice
+
+config IA64_PRINT_HAZARDS
+ bool "Print possible IA-64 dependency violations to console"
+ depends on DEBUG_KERNEL
+ help
+ Selecting this option prints more information for Illegal Dependency
+ Faults, that is, for Read-after-Write (RAW), Write-after-Write (WAW),
+ or Write-after-Read (WAR) violations. This option is ignored if you
+ are compiling for an Itanium A step processor
+ (CONFIG_ITANIUM_ASTEP_SPECIFIC). If you're unsure, select Y.
+
+config DISABLE_VHPT
+ bool "Disable VHPT"
+ depends on DEBUG_KERNEL
+ help
+ The Virtual Hash Page Table (VHPT) enhances virtual address
+ translation performance. Normally you want the VHPT active but you
+ can select this option to disable the VHPT for debugging. If you're
+ unsure, answer N.
+
+config IA64_DEBUG_CMPXCHG
+ bool "Turn on compare-and-exchange bug checking (slow!)"
+ depends on DEBUG_KERNEL
+ help
+ Selecting this option turns on bug checking for the IA-64
+ compare-and-exchange instructions. This is slow! Itaniums
+ from step B3 or later don't have this problem. If you're unsure,
+ select N.
+
+config IA64_DEBUG_IRQ
+ bool "Turn on irq debug checks (slow!)"
+ depends on DEBUG_KERNEL
+ help
+ Selecting this option turns on bug checking for the IA-64 irq_save
+ and restore instructions. It's useful for tracking down spinlock
+ problems, but slow! If you're unsure, select N.
+
+config SYSVIPC_COMPAT
+ bool
+ depends on COMPAT && SYSVIPC
+ default y
+
+endmenu
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
new file mode 100644
index 00000000000..f9bd88ada70
--- /dev/null
+++ b/arch/ia64/Makefile
@@ -0,0 +1,115 @@
+#
+# ia64/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1998-2004 by David Mosberger-Tang <davidm@hpl.hp.com>
+#
+
+NM := $(CROSS_COMPILE)nm -B
+READELF := $(CROSS_COMPILE)readelf
+
+export AWK
+
+CHECKFLAGS += -m64 -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
+
+OBJCOPYFLAGS := --strip-all
+LDFLAGS_vmlinux := -static
+LDFLAGS_MODULE += -T $(srctree)/arch/ia64/module.lds
+AFLAGS_KERNEL := -mconstant-gp
+EXTRA :=
+
+cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \
+ -falign-functions=32 -frename-registers -fno-optimize-sibling-calls
+CFLAGS_KERNEL := -mconstant-gp
+
+GCC_VERSION := $(call cc-version)
+GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")
+CPPFLAGS += $(shell $(srctree)/arch/ia64/scripts/toolchain-flags "$(CC)" "$(OBJDUMP)" "$(READELF)")
+
+ifeq ($(GAS_STATUS),buggy)
+$(error Sorry, you need a newer version of the assember, one that is built from \
+ a source-tree that post-dates 18-Dec-2002. You can find a pre-compiled \
+ static binary of such an assembler at: \
+ \
+ ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
+endif
+
+ifneq ($(shell if [ $(GCC_VERSION) -lt 0300 ] ; then echo "bad"; fi ;),)
+$(error Sorry, your compiler is too old. GCC v2.96 is known to generate bad code.)
+endif
+
+ifeq ($(GCC_VERSION),0304)
+ cflags-$(CONFIG_ITANIUM) += -mtune=merced
+ cflags-$(CONFIG_MCKINLEY) += -mtune=mckinley
+endif
+
+CFLAGS += $(cflags-y)
+head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o
+
+libs-y += arch/ia64/lib/
+core-y += arch/ia64/kernel/ arch/ia64/mm/
+core-$(CONFIG_IA32_SUPPORT) += arch/ia64/ia32/
+core-$(CONFIG_IA64_DIG) += arch/ia64/dig/
+core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/
+core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/
+core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
+core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/
+
+drivers-$(CONFIG_PCI) += arch/ia64/pci/
+drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/
+drivers-$(CONFIG_IA64_HP_ZX1) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
+drivers-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
+drivers-$(CONFIG_IA64_GENERIC) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/ arch/ia64/sn/
+drivers-$(CONFIG_OPROFILE) += arch/ia64/oprofile/
+
+boot := arch/ia64/hp/sim/boot
+
+.PHONY: boot compressed check
+
+all: compressed unwcheck
+
+compressed: vmlinux.gz
+
+vmlinux.gz: vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) $@
+
+unwcheck: vmlinux
+ -$(Q)READELF=$(READELF) $(srctree)/arch/ia64/scripts/unwcheck.py $<
+
+archclean:
+ $(Q)$(MAKE) $(clean)=$(boot)
+
+CLEAN_FILES += include/asm-ia64/.offsets.h.stamp vmlinux.gz bootloader
+
+MRPROPER_FILES += include/asm-ia64/offsets.h
+
+prepare: include/asm-ia64/offsets.h
+
+arch/ia64/kernel/asm-offsets.s: include/asm include/linux/version.h include/config/MARKER
+
+include/asm-ia64/offsets.h: arch/ia64/kernel/asm-offsets.s
+ $(call filechk,gen-asm-offsets)
+
+arch/ia64/kernel/asm-offsets.s: include/asm-ia64/.offsets.h.stamp
+
+include/asm-ia64/.offsets.h.stamp:
+ mkdir -p include/asm-ia64
+ [ -s include/asm-ia64/offsets.h ] \
+ || echo "#define IA64_TASK_SIZE 0" > include/asm-ia64/offsets.h
+ touch $@
+
+boot: lib/lib.a vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) $@
+
+install: vmlinux.gz
+ sh $(srctree)/arch/ia64/install.sh $(KERNELRELEASE) $< System.map "$(INSTALL_PATH)"
+
+define archhelp
+ echo '* compressed - Build compressed kernel image'
+ echo ' install - Install compressed kernel image'
+ echo ' boot - Build vmlinux and bootloader for Ski simulator'
+ echo '* unwcheck - Check vmlinux for invalid unwind info'
+endef
diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig
new file mode 100644
index 00000000000..b95fcf86ea0
--- /dev/null
+++ b/arch/ia64/configs/bigsur_defconfig
@@ -0,0 +1,1172 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.10-rc2
+# Mon Nov 29 13:27:48 2004
+#
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+CONFIG_LOCK_KERNEL=y
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_HOTPLUG=y
+CONFIG_KOBJECT_UEVENT=y
+# CONFIG_IKCONFIG is not set
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+# CONFIG_TINY_SHMEM is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
+
+#
+# Processor type and features
+#
+CONFIG_IA64=y
+CONFIG_64BIT=y
+CONFIG_MMU=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_TIME_INTERPOLATION=y
+CONFIG_EFI=y
+CONFIG_GENERIC_IOMAP=y
+# CONFIG_IA64_GENERIC is not set
+CONFIG_IA64_DIG=y
+# CONFIG_IA64_HP_ZX1 is not set
+# CONFIG_IA64_SGI_SN2 is not set
+# CONFIG_IA64_HP_SIM is not set
+CONFIG_ITANIUM=y
+# CONFIG_MCKINLEY is not set
+# CONFIG_IA64_PAGE_SIZE_4KB is not set
+# CONFIG_IA64_PAGE_SIZE_8KB is not set
+CONFIG_IA64_PAGE_SIZE_16KB=y
+# CONFIG_IA64_PAGE_SIZE_64KB is not set
+CONFIG_IA64_BRL_EMU=y
+CONFIG_IA64_L1_CACHE_SHIFT=6
+# CONFIG_NUMA is not set
+# CONFIG_VIRTUAL_MEM_MAP is not set
+# CONFIG_IA64_CYCLONE is not set
+CONFIG_IOSAPIC=y
+CONFIG_FORCE_MAX_ZONEORDER=18
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_PREEMPT=y
+CONFIG_HAVE_DEC_LOCK=y
+CONFIG_IA32_SUPPORT=y
+CONFIG_COMPAT=y
+# CONFIG_IA64_MCA_RECOVERY is not set
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+
+#
+# Firmware Drivers
+#
+CONFIG_EFI_VARS=y
+CONFIG_EFI_PCDP=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=m
+
+#
+# Power management and ACPI
+#
+CONFIG_PM=y
+CONFIG_ACPI=y
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_VIDEO=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_THERMAL=m
+CONFIG_ACPI_BLACKLIST_YEAR=0
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI_SYSTEM=y
+
+#
+# Bus options (PCI, PCMCIA)
+#
+CONFIG_PCI=y
+CONFIG_PCI_DOMAINS=y
+# CONFIG_PCI_MSI is not set
+CONFIG_PCI_LEGACY_PROC=y
+CONFIG_PCI_NAMES=y
+
+#
+# PCI Hotplug Support
+#
+# CONFIG_HOTPLUG_PCI is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PC-card bridges
+#
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+# CONFIG_PNP is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+# CONFIG_BLK_DEV_SX8 is not set
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_RAM=m
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CDROM_PKTCDVD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=m
+CONFIG_BLK_DEV_IDE=m
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+CONFIG_BLK_DEV_IDEDISK=m
+# CONFIG_IDEDISK_MULTI_MODE is not set
+CONFIG_BLK_DEV_IDECD=m
+# CONFIG_BLK_DEV_IDETAPE is not set
+CONFIG_BLK_DEV_IDEFLOPPY=m
+# CONFIG_BLK_DEV_IDESCSI is not set
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=m
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+# CONFIG_BLK_DEV_OFFBOARD is not set
+CONFIG_BLK_DEV_GENERIC=m
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+# CONFIG_BLK_DEV_AMD74XX is not set
+# CONFIG_BLK_DEV_CMD64X is not set
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_HPT34X is not set
+# CONFIG_BLK_DEV_HPT366 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_PIIX=m
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_PDC202XX_OLD is not set
+# CONFIG_BLK_DEV_PDC202XX_NEW is not set
+# CONFIG_BLK_DEV_SVWKS is not set
+# CONFIG_BLK_DEV_SIIMAGE is not set
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+# CONFIG_BLK_DEV_VIA82CXXX is not set
+# CONFIG_IDE_ARM is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=m
+# CONFIG_SCSI_FC_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_MEGARAID_NEWGEN is not set
+# CONFIG_MEGARAID_LEGACY is not set
+# CONFIG_SCSI_SATA is not set
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_SYM53C8XX_2 is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_QLOGIC_ISP is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
+CONFIG_SCSI_QLOGIC_1280=y
+# CONFIG_SCSI_QLOGIC_1280_1040 is not set
+CONFIG_SCSI_QLA2XXX=y
+# CONFIG_SCSI_QLA21XX is not set
+# CONFIG_SCSI_QLA22XX is not set
+# CONFIG_SCSI_QLA2300 is not set
+# CONFIG_SCSI_QLA2322 is not set
+# CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_QLA6322 is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID5=m
+CONFIG_MD_RAID6=m
+CONFIG_MD_MULTIPATH=m
+# CONFIG_MD_FAULTY is not set
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+# CONFIG_NETLINK_DEV is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_IP_TCPDIAG=y
+# CONFIG_IP_TCPDIAG_IPV6 is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETFILTER is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+# CONFIG_NET_CLS_ROUTE is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_NET_VENDOR_3COM is not set
+
+#
+# Tulip family network device support
+#
+# CONFIG_NET_TULIP is not set
+# CONFIG_HP100 is not set
+CONFIG_NET_PCI=y
+# CONFIG_PCNET32 is not set
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_B44 is not set
+# CONFIG_FORCEDETH is not set
+# CONFIG_DGRS is not set
+CONFIG_EEPRO100=y
+# CONFIG_EEPRO100_PIO is not set
+# CONFIG_E100 is not set
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+# CONFIG_NE2K_PCI is not set
+# CONFIG_8139CP is not set
+# CONFIG_8139TOO is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_VIA_RHINE is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+# CONFIG_E1000 is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SK98LIN is not set
+# CONFIG_VIA_VELOCITY is not set
+# CONFIG_TIGON3 is not set
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+# CONFIG_GAMEPORT is not set
+CONFIG_SOUND_GAMEPORT=y
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PCIPS2 is not set
+# CONFIG_SERIO_RAW is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+# CONFIG_MOUSE_SERIAL is not set
+# CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_ACPI=y
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+# CONFIG_SERIAL_8250_MULTIPORT is not set
+# CONFIG_SERIAL_8250_RSA is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
+CONFIG_EFI_RTC=y
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+CONFIG_AGP=m
+CONFIG_AGP_I460=m
+CONFIG_DRM=y
+# CONFIG_DRM_TDFX is not set
+CONFIG_DRM_R128=m
+# CONFIG_DRM_RADEON is not set
+# CONFIG_DRM_MGA is not set
+# CONFIG_DRM_SIS is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_HPET is not set
+
+#
+# I2C support
+#
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+
+#
+# I2C Algorithms
+#
+CONFIG_I2C_ALGOBIT=y
+# CONFIG_I2C_ALGOPCF is not set
+# CONFIG_I2C_ALGOPCA is not set
+
+#
+# I2C Hardware Bus support
+#
+# CONFIG_I2C_ALI1535 is not set
+# CONFIG_I2C_ALI1563 is not set
+# CONFIG_I2C_ALI15X3 is not set
+# CONFIG_I2C_AMD756 is not set
+# CONFIG_I2C_AMD8111 is not set
+# CONFIG_I2C_I801 is not set
+# CONFIG_I2C_I810 is not set
+# CONFIG_I2C_ISA is not set
+# CONFIG_I2C_NFORCE2 is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_PROSAVAGE is not set
+# CONFIG_I2C_SAVAGE4 is not set
+# CONFIG_SCx200_ACB is not set
+# CONFIG_I2C_SIS5595 is not set
+# CONFIG_I2C_SIS630 is not set
+# CONFIG_I2C_SIS96X is not set
+# CONFIG_I2C_STUB is not set
+# CONFIG_I2C_VIA is not set
+# CONFIG_I2C_VIAPRO is not set
+# CONFIG_I2C_VOODOO3 is not set
+# CONFIG_I2C_PCA_ISA is not set
+
+#
+# Hardware Sensors Chip support
+#
+# CONFIG_I2C_SENSOR is not set
+# CONFIG_SENSORS_ADM1021 is not set
+# CONFIG_SENSORS_ADM1025 is not set
+# CONFIG_SENSORS_ADM1031 is not set
+# CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_DS1621 is not set
+# CONFIG_SENSORS_FSCHER is not set
+# CONFIG_SENSORS_GL518SM is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_LM63 is not set
+# CONFIG_SENSORS_LM75 is not set
+# CONFIG_SENSORS_LM77 is not set
+# CONFIG_SENSORS_LM78 is not set
+# CONFIG_SENSORS_LM80 is not set
+# CONFIG_SENSORS_LM83 is not set
+# CONFIG_SENSORS_LM85 is not set
+# CONFIG_SENSORS_LM87 is not set
+# CONFIG_SENSORS_LM90 is not set
+# CONFIG_SENSORS_MAX1619 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_VIA686A is not set
+# CONFIG_SENSORS_W83781D is not set
+# CONFIG_SENSORS_W83L785TS is not set
+# CONFIG_SENSORS_W83627HF is not set
+
+#
+# Other I2C Chip support
+#
+# CONFIG_SENSORS_EEPROM is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_RTC8564 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+CONFIG_DUMMY_CONSOLE=y
+
+#
+# Sound
+#
+CONFIG_SOUND=m
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=m
+CONFIG_SND_TIMER=m
+CONFIG_SND_PCM=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_SEQUENCER=m
+# CONFIG_SND_SEQ_DUMMY is not set
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+# CONFIG_SND_SEQUENCER_OSS is not set
+# CONFIG_SND_VERBOSE_PRINTK is not set
+# CONFIG_SND_DEBUG is not set
+
+#
+# Generic devices
+#
+CONFIG_SND_OPL3_LIB=m
+# CONFIG_SND_DUMMY is not set
+# CONFIG_SND_VIRMIDI is not set
+# CONFIG_SND_MTPAV is not set
+# CONFIG_SND_SERIAL_U16550 is not set
+# CONFIG_SND_MPU401 is not set
+
+#
+# PCI devices
+#
+CONFIG_SND_AC97_CODEC=m
+# CONFIG_SND_ALI5451 is not set
+# CONFIG_SND_ATIIXP is not set
+# CONFIG_SND_ATIIXP_MODEM is not set
+# CONFIG_SND_AU8810 is not set
+# CONFIG_SND_AU8820 is not set
+# CONFIG_SND_AU8830 is not set
+# CONFIG_SND_AZT3328 is not set
+# CONFIG_SND_BT87X is not set
+# CONFIG_SND_CS46XX is not set
+CONFIG_SND_CS4281=m
+# CONFIG_SND_EMU10K1 is not set
+# CONFIG_SND_KORG1212 is not set
+# CONFIG_SND_MIXART is not set
+# CONFIG_SND_NM256 is not set
+# CONFIG_SND_RME32 is not set
+# CONFIG_SND_RME96 is not set
+# CONFIG_SND_RME9652 is not set
+# CONFIG_SND_HDSP is not set
+# CONFIG_SND_TRIDENT is not set
+# CONFIG_SND_YMFPCI is not set
+# CONFIG_SND_ALS4000 is not set
+# CONFIG_SND_CMIPCI is not set
+# CONFIG_SND_ENS1370 is not set
+# CONFIG_SND_ENS1371 is not set
+# CONFIG_SND_ES1938 is not set
+# CONFIG_SND_ES1968 is not set
+# CONFIG_SND_MAESTRO3 is not set
+# CONFIG_SND_FM801 is not set
+# CONFIG_SND_ICE1712 is not set
+# CONFIG_SND_ICE1724 is not set
+# CONFIG_SND_INTEL8X0 is not set
+# CONFIG_SND_INTEL8X0M is not set
+# CONFIG_SND_SONICVIBES is not set
+# CONFIG_SND_VIA82XX is not set
+# CONFIG_SND_VX222 is not set
+
+#
+# USB devices
+#
+# CONFIG_SND_USB_AUDIO is not set
+# CONFIG_SND_USB_USX2Y is not set
+
+#
+# Open Sound System
+#
+# CONFIG_SOUND_PRIME is not set
+
+#
+# USB support
+#
+CONFIG_USB=m
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_SUSPEND is not set
+# CONFIG_USB_OTG is not set
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+
+#
+# USB Host Controller Drivers
+#
+# CONFIG_USB_EHCI_HCD is not set
+# CONFIG_USB_OHCI_HCD is not set
+CONFIG_USB_UHCI_HCD=m
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_AUDIO=m
+CONFIG_USB_BLUETOOTH_TTY=m
+CONFIG_USB_MIDI=m
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_RW_DETECT is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
+# CONFIG_USB_STORAGE_DPCM is not set
+# CONFIG_USB_STORAGE_HP8200e is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_SDDR55 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=m
+CONFIG_USB_HIDINPUT=y
+# CONFIG_HID_FF is not set
+CONFIG_USB_HIDDEV=y
+
+#
+# USB HID Boot Protocol drivers
+#
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+# CONFIG_USB_AIPTEK is not set
+# CONFIG_USB_WACOM is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+# CONFIG_USB_HPUSBSCSI is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_TIGL is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_TEST is not set
+
+#
+# USB ATM/DSL drivers
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+CONFIG_FS_POSIX_ACL=y
+CONFIG_XFS_FS=y
+# CONFIG_XFS_RT is not set
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_SECURITY=y
+CONFIG_XFS_POSIX_ACL=y
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_QUOTA is not set
+CONFIG_QUOTACTL=y
+CONFIG_DNOTIFY=y
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+# CONFIG_ZISOFS is not set
+CONFIG_UDF_FS=m
+CONFIG_UDF_NLS=y
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+# CONFIG_MSDOS_FS is not set
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVFS_FS is not set
+CONFIG_DEVPTS_FS_XATTR=y
+CONFIG_DEVPTS_FS_SECURITY=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_XATTR is not set
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V4=y
+# CONFIG_NFS_DIRECTIO is not set
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_TCP=y
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=m
+CONFIG_SUNRPC=m
+CONFIG_SUNRPC_GSS=m
+CONFIG_RPCSEC_GSS_KRB5=m
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+CONFIG_SGI_PARTITION=y
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+CONFIG_EFI_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+CONFIG_NLS_UTF8=m
+
+#
+# Library routines
+#
+# CONFIG_CRC_CCITT is not set
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+
+#
+# Profiling support
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_IA64_GRANULE_16MB is not set
+CONFIG_IA64_GRANULE_64MB=y
+# CONFIG_IA64_PRINT_HAZARDS is not set
+# CONFIG_DISABLE_VHPT is not set
+# CONFIG_IA64_DEBUG_CMPXCHG is not set
+# CONFIG_IA64_DEBUG_IRQ is not set
+CONFIG_SYSVIPC_COMPAT=y
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_WP512 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_TEST is not set
diff --git a/arch/ia64/configs/sim_defconfig b/arch/ia64/configs/sim_defconfig
new file mode 100644
index 00000000000..a26781cfe8b
--- /dev/null
+++ b/arch/ia64/configs/sim_defconfig
@@ -0,0 +1,534 @@
+#
+# Automatically generated make config: don't edit
+#
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+# CONFIG_CLEAN_COMPILE is not set
+# CONFIG_STANDALONE is not set
+CONFIG_BROKEN=y
+CONFIG_BROKEN_ON_SMP=y
+
+#
+# General setup
+#
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+CONFIG_LOG_BUF_SHIFT=16
+# CONFIG_HOTPLUG is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_OBSOLETE_MODPARM=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
+
+#
+# Processor type and features
+#
+CONFIG_IA64=y
+CONFIG_64BIT=y
+CONFIG_MMU=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_TIME_INTERPOLATION=y
+CONFIG_EFI=y
+# CONFIG_IA64_GENERIC is not set
+# CONFIG_IA64_DIG is not set
+# CONFIG_IA64_HP_ZX1 is not set
+# CONFIG_IA64_SGI_SN2 is not set
+CONFIG_IA64_HP_SIM=y
+# CONFIG_ITANIUM is not set
+CONFIG_MCKINLEY=y
+# CONFIG_IA64_PAGE_SIZE_4KB is not set
+# CONFIG_IA64_PAGE_SIZE_8KB is not set
+# CONFIG_IA64_PAGE_SIZE_16KB is not set
+CONFIG_IA64_PAGE_SIZE_64KB=y
+CONFIG_IA64_L1_CACHE_SHIFT=7
+# CONFIG_MCKINLEY_ASTEP_SPECIFIC is not set
+# CONFIG_VIRTUAL_MEM_MAP is not set
+# CONFIG_IA64_CYCLONE is not set
+CONFIG_FORCE_MAX_ZONEORDER=18
+CONFIG_SMP=y
+CONFIG_NR_CPUS=64
+CONFIG_PREEMPT=y
+CONFIG_HAVE_DEC_LOCK=y
+CONFIG_IA32_SUPPORT=y
+CONFIG_COMPAT=y
+# CONFIG_PERFMON is not set
+CONFIG_IA64_PALINFO=m
+
+#
+# Firmware Drivers
+#
+CONFIG_EFI_VARS=y
+# CONFIG_SMBIOS is not set
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=y
+
+#
+# Power management and ACPI
+#
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=4096
+# CONFIG_BLK_DEV_INITRD is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=y
+# CONFIG_SCSI_FC_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_SATA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+# CONFIG_NETLINK_DEV is not set
+# CONFIG_UNIX is not set
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETFILTER is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_NETDEVICES is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+# CONFIG_GAMEPORT is not set
+CONFIG_SOUND_GAMEPORT=y
+CONFIG_SERIO=y
+# CONFIG_SERIO_I8042 is not set
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_UNIX98_PTYS=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_QIC02_TAPE is not set
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+CONFIG_EFI_RTC=y
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_RAW_DRIVER is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+
+#
+# Console display driver support
+#
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_MDA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_FS_XATTR is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_FAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVPTS_FS_XATTR is not set
+# CONFIG_TMPFS is not set
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+# CONFIG_NFS_V3 is not set
+# CONFIG_NFS_V4 is not set
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V4 is not set
+# CONFIG_NFSD_TCP is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_NEC98_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+CONFIG_EFI_PARTITION=y
+
+#
+# Native Language Support
+#
+# CONFIG_NLS is not set
+
+#
+# Library routines
+#
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+
+#
+# HP Simulator drivers
+#
+CONFIG_HP_SIMETH=y
+CONFIG_HP_SIMSERIAL=y
+CONFIG_HP_SIMSERIAL_CONSOLE=y
+CONFIG_HP_SIMSCSI=y
+
+#
+# Profiling support
+#
+# CONFIG_PROFILING is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_IA64_GRANULE_16MB is not set
+CONFIG_IA64_GRANULE_64MB=y
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_IA64_PRINT_HAZARDS is not set
+# CONFIG_DISABLE_VHPT is not set
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_IA64_DEBUG_CMPXCHG is not set
+# CONFIG_IA64_DEBUG_IRQ is not set
+CONFIG_DEBUG_INFO=y
+CONFIG_SYSVIPC_COMPAT=y
+
+#
+# Security options
+#
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig
new file mode 100644
index 00000000000..bfeb952fe8e
--- /dev/null
+++ b/arch/ia64/configs/sn2_defconfig
@@ -0,0 +1,1038 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.10
+# Mon Jan 10 13:57:35 2005
+#
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+CONFIG_LOCK_KERNEL=y
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_HOTPLUG=y
+CONFIG_KOBJECT_UEVENT=y
+# CONFIG_IKCONFIG is not set
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_CPUSETS=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+# CONFIG_TINY_SHMEM is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
+
+#
+# Processor type and features
+#
+CONFIG_IA64=y
+CONFIG_64BIT=y
+CONFIG_MMU=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_TIME_INTERPOLATION=y
+CONFIG_EFI=y
+CONFIG_GENERIC_IOMAP=y
+# CONFIG_IA64_GENERIC is not set
+# CONFIG_IA64_DIG is not set
+# CONFIG_IA64_HP_ZX1 is not set
+CONFIG_IA64_SGI_SN2=y
+# CONFIG_IA64_HP_SIM is not set
+# CONFIG_ITANIUM is not set
+CONFIG_MCKINLEY=y
+# CONFIG_IA64_PAGE_SIZE_4KB is not set
+# CONFIG_IA64_PAGE_SIZE_8KB is not set
+CONFIG_IA64_PAGE_SIZE_16KB=y
+# CONFIG_IA64_PAGE_SIZE_64KB is not set
+CONFIG_IA64_L1_CACHE_SHIFT=7
+CONFIG_NUMA=y
+CONFIG_VIRTUAL_MEM_MAP=y
+CONFIG_HOLES_IN_ZONE=y
+CONFIG_DISCONTIGMEM=y
+# CONFIG_IA64_CYCLONE is not set
+CONFIG_IOSAPIC=y
+CONFIG_IA64_SGI_SN_SIM=y
+CONFIG_FORCE_MAX_ZONEORDER=18
+CONFIG_SMP=y
+CONFIG_NR_CPUS=512
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_PREEMPT=y
+CONFIG_HAVE_DEC_LOCK=y
+CONFIG_IA32_SUPPORT=y
+CONFIG_COMPAT=y
+CONFIG_IA64_MCA_RECOVERY=y
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+CONFIG_ACPI_DEALLOCATE_IRQ=y
+
+#
+# Firmware Drivers
+#
+CONFIG_EFI_VARS=y
+# CONFIG_EFI_PCDP is not set
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management and ACPI
+#
+CONFIG_ACPI=y
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_INTERPRETER=y
+# CONFIG_ACPI_BUTTON is not set
+CONFIG_ACPI_VIDEO=m
+# CONFIG_ACPI_FAN is not set
+# CONFIG_ACPI_PROCESSOR is not set
+CONFIG_ACPI_NUMA=y
+CONFIG_ACPI_BLACKLIST_YEAR=0
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI_SYSTEM=y
+# CONFIG_ACPI_CONTAINER is not set
+
+#
+# Bus options (PCI, PCMCIA)
+#
+CONFIG_PCI=y
+CONFIG_PCI_DOMAINS=y
+# CONFIG_PCI_MSI is not set
+CONFIG_PCI_LEGACY_PROC=y
+CONFIG_PCI_NAMES=y
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=y
+# CONFIG_HOTPLUG_PCI_FAKE is not set
+# CONFIG_HOTPLUG_PCI_ACPI is not set
+# CONFIG_HOTPLUG_PCI_CPCI is not set
+# CONFIG_HOTPLUG_PCI_PCIE is not set
+# CONFIG_HOTPLUG_PCI_SHPC is not set
+CONFIG_HOTPLUG_PCI_SGI=y
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PC-card bridges
+#
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=m
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+# CONFIG_PNP is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+# CONFIG_BLK_DEV_SX8 is not set
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CDROM_PKTCDVD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_ATA_OVER_ETH=m
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+CONFIG_BLK_DEV_IDEDISK=y
+# CONFIG_IDEDISK_MULTI_MODE is not set
+CONFIG_BLK_DEV_IDECD=y
+# CONFIG_BLK_DEV_IDETAPE is not set
+# CONFIG_BLK_DEV_IDEFLOPPY is not set
+# CONFIG_BLK_DEV_IDESCSI is not set
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_IDEPCI=y
+# CONFIG_IDEPCI_SHARE_IRQ is not set
+# CONFIG_BLK_DEV_OFFBOARD is not set
+# CONFIG_BLK_DEV_GENERIC is not set
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+# CONFIG_BLK_DEV_AMD74XX is not set
+# CONFIG_BLK_DEV_CMD64X is not set
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_HPT34X is not set
+# CONFIG_BLK_DEV_HPT366 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+# CONFIG_BLK_DEV_PIIX is not set
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_PDC202XX_OLD is not set
+# CONFIG_BLK_DEV_PDC202XX_NEW is not set
+# CONFIG_BLK_DEV_SVWKS is not set
+CONFIG_BLK_DEV_SGIIOC4=y
+# CONFIG_BLK_DEV_SIIMAGE is not set
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+# CONFIG_BLK_DEV_VIA82CXXX is not set
+# CONFIG_IDE_ARM is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+# CONFIG_CHR_DEV_OSST is not set
+CONFIG_BLK_DEV_SR=m
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_CHR_DEV_SG=m
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+CONFIG_SCSI_CONSTANTS=y
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_FC_ATTRS=y
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_MEGARAID_NEWGEN is not set
+# CONFIG_MEGARAID_LEGACY is not set
+CONFIG_SCSI_SATA=y
+# CONFIG_SCSI_SATA_AHCI is not set
+# CONFIG_SCSI_SATA_SVW is not set
+# CONFIG_SCSI_ATA_PIIX is not set
+# CONFIG_SCSI_SATA_NV is not set
+# CONFIG_SCSI_SATA_PROMISE is not set
+# CONFIG_SCSI_SATA_SX4 is not set
+# CONFIG_SCSI_SATA_SIL is not set
+# CONFIG_SCSI_SATA_SIS is not set
+# CONFIG_SCSI_SATA_ULI is not set
+# CONFIG_SCSI_SATA_VIA is not set
+CONFIG_SCSI_SATA_VITESSE=y
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_SYM53C8XX_2 is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_QLOGIC_ISP is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
+CONFIG_SCSI_QLOGIC_1280=y
+# CONFIG_SCSI_QLOGIC_1280_1040 is not set
+CONFIG_SCSI_QLA2XXX=y
+# CONFIG_SCSI_QLA21XX is not set
+CONFIG_SCSI_QLA22XX=y
+CONFIG_SCSI_QLA2300=y
+CONFIG_SCSI_QLA2322=y
+# CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+# CONFIG_MD_RAID10 is not set
+CONFIG_MD_RAID5=y
+# CONFIG_MD_RAID6 is not set
+CONFIG_MD_MULTIPATH=y
+# CONFIG_MD_FAULTY is not set
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=y
+CONFIG_FUSION_MAX_SGE=128
+CONFIG_FUSION_CTL=m
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_NETLINK_DEV=y
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_IP_TCPDIAG=y
+# CONFIG_IP_TCPDIAG_IPV6 is not set
+CONFIG_IPV6=m
+# CONFIG_IPV6_PRIVACY is not set
+# CONFIG_INET6_AH is not set
+# CONFIG_INET6_ESP is not set
+# CONFIG_INET6_IPCOMP is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_IPV6_TUNNEL is not set
+# CONFIG_NETFILTER is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+# CONFIG_NET_CLS_ROUTE is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_RX is not set
+# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NET_POLL_CONTROLLER=y
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_ETHERTAP is not set
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+# CONFIG_NET_ETHERNET is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+# CONFIG_E1000 is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SK98LIN is not set
+CONFIG_TIGON3=y
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_IXGB is not set
+CONFIG_S2IO=m
+# CONFIG_S2IO_NAPI is not set
+# CONFIG_2BUFF_MODE is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
+# CONFIG_SHAPER is not set
+CONFIG_NETCONSOLE=y
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+# CONFIG_GAMEPORT is not set
+CONFIG_SOUND_GAMEPORT=y
+# CONFIG_SERIO is not set
+# CONFIG_SERIO_I8042 is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_ROCKETPORT is not set
+# CONFIG_CYCLADES is not set
+# CONFIG_MOXA_SMARTIO is not set
+# CONFIG_ISI is not set
+# CONFIG_SYNCLINK is not set
+# CONFIG_SYNCLINKMP is not set
+# CONFIG_N_HDLC is not set
+# CONFIG_STALDRV is not set
+CONFIG_SGI_SNSC=y
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_SERIAL_SGI_L1_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
+CONFIG_EFI_RTC=y
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+CONFIG_RAW_DRIVER=m
+# CONFIG_HPET is not set
+CONFIG_MAX_RAW_DEVS=256
+CONFIG_MMTIMER=y
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+
+#
+# Console display driver support
+#
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+CONFIG_USB=m
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+# CONFIG_USB_DEVICEFS is not set
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+# CONFIG_USB_EHCI_SPLIT_ISO is not set
+# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_UHCI_HCD=m
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+#
+# CONFIG_USB_STORAGE is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=m
+CONFIG_USB_HIDINPUT=y
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+
+#
+# USB HID Boot Protocol drivers
+#
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+# CONFIG_USB_AIPTEK is not set
+# CONFIG_USB_WACOM is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+
+#
+# USB ATM/DSL drivers
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_MTHCA=m
+# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=y
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+# CONFIG_JFS_FS is not set
+CONFIG_FS_POSIX_ACL=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_RT=y
+CONFIG_XFS_QUOTA=y
+# CONFIG_XFS_SECURITY is not set
+CONFIG_XFS_POSIX_ACL=y
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_QUOTA=y
+# CONFIG_QFMT_V1 is not set
+# CONFIG_QFMT_V2 is not set
+CONFIG_QUOTACTL=y
+CONFIG_DNOTIFY=y
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+# CONFIG_ZISOFS is not set
+CONFIG_UDF_FS=m
+CONFIG_UDF_NLS=y
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+# CONFIG_MSDOS_FS is not set
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVPTS_FS_XATTR is not set
+CONFIG_TMPFS=y
+CONFIG_TMPFS_XATTR=y
+CONFIG_TMPFS_SECURITY=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_TCP=y
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=m
+CONFIG_SUNRPC=m
+CONFIG_SUNRPC_GSS=m
+CONFIG_RPCSEC_GSS_KRB5=m
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+CONFIG_SMB_FS=m
+# CONFIG_SMB_NLS_DEFAULT is not set
+CONFIG_CIFS=m
+# CONFIG_CIFS_STATS is not set
+# CONFIG_CIFS_XATTR is not set
+# CONFIG_CIFS_EXPERIMENTAL is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+CONFIG_SGI_PARTITION=y
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+CONFIG_EFI_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+CONFIG_NLS_UTF8=y
+
+#
+# Library routines
+#
+# CONFIG_CRC_CCITT is not set
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=m
+CONFIG_ZLIB_DEFLATE=m
+
+#
+# Profiling support
+#
+# CONFIG_PROFILING is not set
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_FS is not set
+CONFIG_IA64_GRANULE_16MB=y
+# CONFIG_IA64_GRANULE_64MB is not set
+# CONFIG_IA64_PRINT_HAZARDS is not set
+# CONFIG_DISABLE_VHPT is not set
+# CONFIG_IA64_DEBUG_CMPXCHG is not set
+# CONFIG_IA64_DEBUG_IRQ is not set
+CONFIG_SYSVIPC_COMPAT=y
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=m
+CONFIG_CRYPTO_SHA1=m
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_WP512 is not set
+CONFIG_CRYPTO_DES=m
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+CONFIG_CRYPTO_DEFLATE=m
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Hardware crypto devices
+#
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
new file mode 100644
index 00000000000..99830e8fc9b
--- /dev/null
+++ b/arch/ia64/configs/tiger_defconfig
@@ -0,0 +1,1098 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.11-rc2
+# Sat Jan 22 11:17:02 2005
+#
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+CONFIG_LOCK_KERNEL=y
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_HOTPLUG=y
+CONFIG_KOBJECT_UEVENT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+# CONFIG_TINY_SHMEM is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
+
+#
+# Processor type and features
+#
+CONFIG_IA64=y
+CONFIG_64BIT=y
+CONFIG_MMU=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_TIME_INTERPOLATION=y
+CONFIG_EFI=y
+CONFIG_GENERIC_IOMAP=y
+# CONFIG_IA64_GENERIC is not set
+CONFIG_IA64_DIG=y
+# CONFIG_IA64_HP_ZX1 is not set
+# CONFIG_IA64_HP_ZX1_SWIOTLB is not set
+# CONFIG_IA64_SGI_SN2 is not set
+# CONFIG_IA64_HP_SIM is not set
+# CONFIG_ITANIUM is not set
+CONFIG_MCKINLEY=y
+# CONFIG_IA64_PAGE_SIZE_4KB is not set
+# CONFIG_IA64_PAGE_SIZE_8KB is not set
+CONFIG_IA64_PAGE_SIZE_16KB=y
+# CONFIG_IA64_PAGE_SIZE_64KB is not set
+CONFIG_IA64_L1_CACHE_SHIFT=7
+# CONFIG_NUMA is not set
+CONFIG_VIRTUAL_MEM_MAP=y
+CONFIG_HOLES_IN_ZONE=y
+CONFIG_IA64_CYCLONE=y
+CONFIG_IOSAPIC=y
+CONFIG_FORCE_MAX_ZONEORDER=18
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+# CONFIG_PREEMPT is not set
+CONFIG_HAVE_DEC_LOCK=y
+CONFIG_IA32_SUPPORT=y
+CONFIG_COMPAT=y
+CONFIG_IA64_MCA_RECOVERY=y
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+CONFIG_ACPI_DEALLOCATE_IRQ=y
+
+#
+# Firmware Drivers
+#
+CONFIG_EFI_VARS=y
+CONFIG_EFI_PCDP=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=m
+
+#
+# Power management and ACPI
+#
+CONFIG_PM=y
+CONFIG_ACPI=y
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_BUTTON=m
+# CONFIG_ACPI_VIDEO is not set
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+# CONFIG_ACPI_HOTPLUG_CPU is not set
+CONFIG_ACPI_THERMAL=m
+CONFIG_ACPI_BLACKLIST_YEAR=0
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI_SYSTEM=y
+# CONFIG_ACPI_CONTAINER is not set
+
+#
+# Bus options (PCI, PCMCIA)
+#
+CONFIG_PCI=y
+CONFIG_PCI_DOMAINS=y
+# CONFIG_PCI_MSI is not set
+CONFIG_PCI_LEGACY_PROC=y
+CONFIG_PCI_NAMES=y
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=m
+# CONFIG_HOTPLUG_PCI_FAKE is not set
+CONFIG_HOTPLUG_PCI_ACPI=m
+# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
+# CONFIG_HOTPLUG_PCI_CPCI is not set
+# CONFIG_HOTPLUG_PCI_SHPC is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PC-card bridges
+#
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+# CONFIG_PNP is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+# CONFIG_BLK_DEV_SX8 is not set
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_RAM=m
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CDROM_PKTCDVD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+CONFIG_BLK_DEV_IDEDISK=y
+# CONFIG_IDEDISK_MULTI_MODE is not set
+CONFIG_BLK_DEV_IDECD=y
+# CONFIG_BLK_DEV_IDETAPE is not set
+CONFIG_BLK_DEV_IDEFLOPPY=y
+CONFIG_BLK_DEV_IDESCSI=m
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_IDEPCI=y
+# CONFIG_IDEPCI_SHARE_IRQ is not set
+# CONFIG_BLK_DEV_OFFBOARD is not set
+CONFIG_BLK_DEV_GENERIC=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+# CONFIG_BLK_DEV_AMD74XX is not set
+CONFIG_BLK_DEV_CMD64X=y
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_HPT34X is not set
+# CONFIG_BLK_DEV_HPT366 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_PDC202XX_OLD is not set
+# CONFIG_BLK_DEV_PDC202XX_NEW is not set
+# CONFIG_BLK_DEV_SVWKS is not set
+# CONFIG_BLK_DEV_SIIMAGE is not set
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+# CONFIG_BLK_DEV_VIA82CXXX is not set
+# CONFIG_IDE_ARM is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+# CONFIG_CHR_DEV_OSST is not set
+CONFIG_BLK_DEV_SR=m
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_CHR_DEV_SG=m
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_FC_ATTRS=y
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_MEGARAID_NEWGEN is not set
+# CONFIG_MEGARAID_LEGACY is not set
+# CONFIG_SCSI_SATA is not set
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_QLOGIC_ISP is not set
+CONFIG_SCSI_QLOGIC_FC=y
+# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set
+CONFIG_SCSI_QLOGIC_1280=y
+# CONFIG_SCSI_QLOGIC_1280_1040 is not set
+CONFIG_SCSI_QLA2XXX=y
+CONFIG_SCSI_QLA21XX=m
+CONFIG_SCSI_QLA22XX=m
+CONFIG_SCSI_QLA2300=m
+CONFIG_SCSI_QLA2322=m
+# CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+# CONFIG_MD_RAID10 is not set
+CONFIG_MD_RAID5=m
+CONFIG_MD_RAID6=m
+CONFIG_MD_MULTIPATH=m
+# CONFIG_MD_FAULTY is not set
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=y
+CONFIG_FUSION_MAX_SGE=40
+# CONFIG_FUSION_CTL is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_NETLINK_DEV=y
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+CONFIG_ARPD=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_IP_TCPDIAG=y
+# CONFIG_IP_TCPDIAG_IPV6 is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETFILTER is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+# CONFIG_NET_CLS_ROUTE is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_RX is not set
+# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NET_POLL_CONTROLLER=y
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_ETHERTAP is not set
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=m
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_NET_VENDOR_3COM is not set
+
+#
+# Tulip family network device support
+#
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+CONFIG_TULIP=m
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_NAPI is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_HP100 is not set
+CONFIG_NET_PCI=y
+# CONFIG_PCNET32 is not set
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_B44 is not set
+# CONFIG_FORCEDETH is not set
+# CONFIG_DGRS is not set
+CONFIG_EEPRO100=m
+CONFIG_E100=m
+# CONFIG_E100_NAPI is not set
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+# CONFIG_NE2K_PCI is not set
+# CONFIG_8139CP is not set
+# CONFIG_8139TOO is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_VIA_RHINE is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+CONFIG_E1000=y
+# CONFIG_E1000_NAPI is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SK98LIN is not set
+# CONFIG_VIA_VELOCITY is not set
+CONFIG_TIGON3=y
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
+# CONFIG_SHAPER is not set
+CONFIG_NETCONSOLE=y
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+CONFIG_GAMEPORT=m
+CONFIG_SOUND_GAMEPORT=m
+# CONFIG_GAMEPORT_NS558 is not set
+# CONFIG_GAMEPORT_L4 is not set
+# CONFIG_GAMEPORT_EMU10K1 is not set
+# CONFIG_GAMEPORT_VORTEX is not set
+# CONFIG_GAMEPORT_FM801 is not set
+# CONFIG_GAMEPORT_CS461X is not set
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+# CONFIG_MOUSE_SERIAL is not set
+# CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_ROCKETPORT is not set
+# CONFIG_CYCLADES is not set
+# CONFIG_MOXA_SMARTIO is not set
+# CONFIG_ISI is not set
+# CONFIG_SYNCLINK is not set
+# CONFIG_SYNCLINKMP is not set
+# CONFIG_N_HDLC is not set
+# CONFIG_STALDRV is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_ACPI=y
+CONFIG_SERIAL_8250_NR_UARTS=6
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+# CONFIG_SERIAL_8250_MULTIPORT is not set
+# CONFIG_SERIAL_8250_RSA is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
+CONFIG_EFI_RTC=y
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+CONFIG_AGP=m
+CONFIG_AGP_I460=m
+CONFIG_DRM=m
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_MGA=m
+CONFIG_DRM_SIS=m
+CONFIG_RAW_DRIVER=m
+CONFIG_HPET=y
+# CONFIG_HPET_RTC_IRQ is not set
+CONFIG_HPET_MMAP=y
+CONFIG_MAX_RAW_DEVS=256
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_SUSPEND is not set
+# CONFIG_USB_OTG is not set
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+# CONFIG_USB_EHCI_SPLIT_ISO is not set
+# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_UHCI_HCD=y
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+#
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_RW_DETECT is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
+# CONFIG_USB_STORAGE_DPCM is not set
+# CONFIG_USB_STORAGE_HP8200e is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_SDDR55 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=y
+CONFIG_USB_HIDINPUT=y
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+# CONFIG_USB_AIPTEK is not set
+# CONFIG_USB_WACOM is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_TEST is not set
+
+#
+# USB ATM/DSL drivers
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# InfiniBand support
+#
+# CONFIG_INFINIBAND is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=y
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+# CONFIG_JFS_FS is not set
+CONFIG_FS_POSIX_ACL=y
+CONFIG_XFS_FS=y
+# CONFIG_XFS_RT is not set
+# CONFIG_XFS_QUOTA is not set
+# CONFIG_XFS_SECURITY is not set
+# CONFIG_XFS_POSIX_ACL is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+CONFIG_AUTOFS_FS=y
+CONFIG_AUTOFS4_FS=y
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+# CONFIG_ZISOFS is not set
+CONFIG_UDF_FS=m
+CONFIG_UDF_NLS=y
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+# CONFIG_MSDOS_FS is not set
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+CONFIG_NTFS_FS=m
+# CONFIG_NTFS_DEBUG is not set
+# CONFIG_NTFS_RW is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVPTS_FS_XATTR is not set
+CONFIG_TMPFS=y
+CONFIG_TMPFS_XATTR=y
+CONFIG_TMPFS_SECURITY=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_TCP=y
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=m
+CONFIG_SUNRPC=m
+CONFIG_SUNRPC_GSS=m
+CONFIG_RPCSEC_GSS_KRB5=m
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+CONFIG_SMB_FS=m
+CONFIG_SMB_NLS_DEFAULT=y
+CONFIG_SMB_NLS_REMOTE="cp437"
+CONFIG_CIFS=m
+# CONFIG_CIFS_STATS is not set
+# CONFIG_CIFS_XATTR is not set
+# CONFIG_CIFS_EXPERIMENTAL is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+CONFIG_SGI_PARTITION=y
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+CONFIG_EFI_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+
+#
+# Library routines
+#
+# CONFIG_CRC_CCITT is not set
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
+
+#
+# Profiling support
+#
+# CONFIG_PROFILING is not set
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+CONFIG_IA64_GRANULE_16MB=y
+# CONFIG_IA64_GRANULE_64MB is not set
+# CONFIG_IA64_PRINT_HAZARDS is not set
+# CONFIG_DISABLE_VHPT is not set
+# CONFIG_IA64_DEBUG_CMPXCHG is not set
+# CONFIG_IA64_DEBUG_IRQ is not set
+CONFIG_SYSVIPC_COMPAT=y
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=m
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_WP512 is not set
+CONFIG_CRYPTO_DES=m
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Hardware crypto devices
+#
diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig
new file mode 100644
index 00000000000..21d6f9bab5e
--- /dev/null
+++ b/arch/ia64/configs/zx1_defconfig
@@ -0,0 +1,1273 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.10
+# Wed Dec 29 09:05:48 2004
+#
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+# CONFIG_CLEAN_COMPILE is not set
+CONFIG_BROKEN=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+# CONFIG_POSIX_MQUEUE is not set
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+CONFIG_LOG_BUF_SHIFT=17
+CONFIG_HOTPLUG=y
+CONFIG_KOBJECT_UEVENT=y
+# CONFIG_IKCONFIG is not set
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+# CONFIG_TINY_SHMEM is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+# CONFIG_MODULE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+# CONFIG_KMOD is not set
+
+#
+# Processor type and features
+#
+CONFIG_IA64=y
+CONFIG_64BIT=y
+CONFIG_MMU=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_TIME_INTERPOLATION=y
+CONFIG_EFI=y
+CONFIG_GENERIC_IOMAP=y
+# CONFIG_IA64_GENERIC is not set
+# CONFIG_IA64_DIG is not set
+CONFIG_IA64_HP_ZX1=y
+# CONFIG_IA64_SGI_SN2 is not set
+# CONFIG_IA64_HP_SIM is not set
+# CONFIG_ITANIUM is not set
+CONFIG_MCKINLEY=y
+# CONFIG_IA64_PAGE_SIZE_4KB is not set
+# CONFIG_IA64_PAGE_SIZE_8KB is not set
+CONFIG_IA64_PAGE_SIZE_16KB=y
+# CONFIG_IA64_PAGE_SIZE_64KB is not set
+CONFIG_IA64_L1_CACHE_SHIFT=7
+# CONFIG_NUMA is not set
+CONFIG_VIRTUAL_MEM_MAP=y
+# CONFIG_IA64_CYCLONE is not set
+CONFIG_IOSAPIC=y
+CONFIG_FORCE_MAX_ZONEORDER=18
+CONFIG_SMP=y
+CONFIG_NR_CPUS=16
+# CONFIG_HOTPLUG_CPU is not set
+# CONFIG_PREEMPT is not set
+CONFIG_HAVE_DEC_LOCK=y
+CONFIG_IA32_SUPPORT=y
+CONFIG_COMPAT=y
+CONFIG_IA64_MCA_RECOVERY=y
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+
+#
+# Firmware Drivers
+#
+CONFIG_EFI_VARS=y
+CONFIG_EFI_PCDP=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=y
+
+#
+# Power management and ACPI
+#
+CONFIG_PM=y
+CONFIG_ACPI=y
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_BUTTON=y
+CONFIG_ACPI_VIDEO=m
+CONFIG_ACPI_FAN=y
+CONFIG_ACPI_PROCESSOR=y
+CONFIG_ACPI_THERMAL=y
+CONFIG_ACPI_BLACKLIST_YEAR=0
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI_SYSTEM=y
+
+#
+# Bus options (PCI, PCMCIA)
+#
+CONFIG_PCI=y
+CONFIG_PCI_DOMAINS=y
+# CONFIG_PCI_MSI is not set
+CONFIG_PCI_LEGACY_PROC=y
+CONFIG_PCI_NAMES=y
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=y
+# CONFIG_HOTPLUG_PCI_FAKE is not set
+CONFIG_HOTPLUG_PCI_ACPI=y
+# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
+# CONFIG_HOTPLUG_PCI_CPCI is not set
+# CONFIG_HOTPLUG_PCI_PCIE is not set
+# CONFIG_HOTPLUG_PCI_SHPC is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PC-card bridges
+#
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+# CONFIG_PNP is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_SX8 is not set
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CDROM_PKTCDVD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+CONFIG_BLK_DEV_IDEDISK=y
+# CONFIG_IDEDISK_MULTI_MODE is not set
+CONFIG_BLK_DEV_IDECD=y
+# CONFIG_BLK_DEV_IDETAPE is not set
+# CONFIG_BLK_DEV_IDEFLOPPY is not set
+# CONFIG_BLK_DEV_IDESCSI is not set
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+# CONFIG_BLK_DEV_OFFBOARD is not set
+CONFIG_BLK_DEV_GENERIC=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+# CONFIG_IDEDMA_PCI_AUTO is not set
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+# CONFIG_BLK_DEV_AMD74XX is not set
+CONFIG_BLK_DEV_CMD64X=y
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_HPT34X is not set
+# CONFIG_BLK_DEV_HPT366 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+# CONFIG_BLK_DEV_PIIX is not set
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_PDC202XX_OLD is not set
+# CONFIG_BLK_DEV_PDC202XX_NEW is not set
+# CONFIG_BLK_DEV_SVWKS is not set
+# CONFIG_BLK_DEV_SIIMAGE is not set
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+# CONFIG_BLK_DEV_VIA82CXXX is not set
+# CONFIG_IDE_ARM is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+# CONFIG_IDEDMA_AUTO is not set
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_CHR_DEV_OSST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=y
+# CONFIG_SCSI_FC_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_MEGARAID_NEWGEN is not set
+# CONFIG_MEGARAID_LEGACY is not set
+# CONFIG_SCSI_SATA is not set
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_CPQFCTS is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_PCI2000 is not set
+# CONFIG_SCSI_PCI2220I is not set
+# CONFIG_SCSI_QLOGIC_ISP is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
+CONFIG_SCSI_QLOGIC_1280=y
+# CONFIG_SCSI_QLOGIC_1280_1040 is not set
+CONFIG_SCSI_QLA2XXX=y
+# CONFIG_SCSI_QLA21XX is not set
+# CONFIG_SCSI_QLA22XX is not set
+# CONFIG_SCSI_QLA2300 is not set
+# CONFIG_SCSI_QLA2322 is not set
+# CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_QLA6322 is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=y
+CONFIG_FUSION_MAX_SGE=40
+# CONFIG_FUSION_CTL is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+# CONFIG_NETLINK_DEV is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_IP_TCPDIAG is not set
+# CONFIG_IP_TCPDIAG_IPV6 is not set
+
+#
+# IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+
+#
+# IP: Netfilter Configuration
+#
+# CONFIG_IP_NF_CONNTRACK is not set
+# CONFIG_IP_NF_CONNTRACK_MARK is not set
+# CONFIG_IP_NF_QUEUE is not set
+# CONFIG_IP_NF_IPTABLES is not set
+CONFIG_IP_NF_ARPTABLES=y
+# CONFIG_IP_NF_ARPFILTER is not set
+# CONFIG_IP_NF_ARP_MANGLE is not set
+# CONFIG_IP_NF_COMPAT_IPCHAINS is not set
+# CONFIG_IP_NF_COMPAT_IPFWADM is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+# CONFIG_NET_CLS_ROUTE is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_NET_VENDOR_3COM is not set
+
+#
+# Tulip family network device support
+#
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+CONFIG_TULIP=y
+CONFIG_TULIP_MWI=y
+CONFIG_TULIP_MMIO=y
+CONFIG_TULIP_NAPI=y
+CONFIG_TULIP_NAPI_HW_MITIGATION=y
+# CONFIG_DE4X5 is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_HP100 is not set
+CONFIG_NET_PCI=y
+# CONFIG_PCNET32 is not set
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_B44 is not set
+# CONFIG_FORCEDETH is not set
+# CONFIG_DGRS is not set
+# CONFIG_EEPRO100 is not set
+CONFIG_E100=y
+# CONFIG_E100_NAPI is not set
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+# CONFIG_NE2K_PCI is not set
+# CONFIG_8139CP is not set
+# CONFIG_8139TOO is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_VIA_RHINE is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+CONFIG_E1000=y
+# CONFIG_E1000_NAPI is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SK98LIN is not set
+# CONFIG_VIA_VELOCITY is not set
+CONFIG_TIGON3=y
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+CONFIG_INPUT_JOYDEV=y
+# CONFIG_INPUT_TSDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+# CONFIG_GAMEPORT is not set
+CONFIG_SOUND_GAMEPORT=y
+CONFIG_SERIO=y
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PCIPS2 is not set
+# CONFIG_SERIO_RAW is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_ACPI=y
+CONFIG_SERIAL_8250_NR_UARTS=8
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+# CONFIG_SERIAL_8250_MULTIPORT is not set
+# CONFIG_SERIAL_8250_RSA is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
+CONFIG_EFI_RTC=y
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+CONFIG_AGP=y
+CONFIG_AGP_HP_ZX1=y
+CONFIG_DRM=y
+# CONFIG_DRM_TDFX is not set
+# CONFIG_DRM_GAMMA is not set
+# CONFIG_DRM_R128 is not set
+CONFIG_DRM_RADEON=y
+# CONFIG_DRM_MGA is not set
+# CONFIG_DRM_SIS is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_HPET is not set
+
+#
+# I2C support
+#
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+
+#
+# I2C Algorithms
+#
+CONFIG_I2C_ALGOBIT=y
+CONFIG_I2C_ALGOPCF=y
+# CONFIG_I2C_ALGOPCA is not set
+
+#
+# I2C Hardware Bus support
+#
+# CONFIG_I2C_ALI1535 is not set
+# CONFIG_I2C_ALI1563 is not set
+# CONFIG_I2C_ALI15X3 is not set
+# CONFIG_I2C_AMD756 is not set
+# CONFIG_I2C_AMD8111 is not set
+# CONFIG_I2C_I801 is not set
+# CONFIG_I2C_I810 is not set
+# CONFIG_I2C_ISA is not set
+# CONFIG_I2C_NFORCE2 is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_PROSAVAGE is not set
+# CONFIG_I2C_SAVAGE4 is not set
+# CONFIG_SCx200_ACB is not set
+# CONFIG_I2C_SIS5595 is not set
+# CONFIG_I2C_SIS630 is not set
+# CONFIG_I2C_SIS96X is not set
+# CONFIG_I2C_STUB is not set
+# CONFIG_I2C_VIA is not set
+# CONFIG_I2C_VIAPRO is not set
+# CONFIG_I2C_VOODOO3 is not set
+# CONFIG_I2C_PCA_ISA is not set
+
+#
+# Hardware Sensors Chip support
+#
+# CONFIG_I2C_SENSOR is not set
+# CONFIG_SENSORS_ADM1021 is not set
+# CONFIG_SENSORS_ADM1025 is not set
+# CONFIG_SENSORS_ADM1026 is not set
+# CONFIG_SENSORS_ADM1031 is not set
+# CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_DS1621 is not set
+# CONFIG_SENSORS_FSCHER is not set
+# CONFIG_SENSORS_GL518SM is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_LM63 is not set
+# CONFIG_SENSORS_LM75 is not set
+# CONFIG_SENSORS_LM77 is not set
+# CONFIG_SENSORS_LM78 is not set
+# CONFIG_SENSORS_LM80 is not set
+# CONFIG_SENSORS_LM83 is not set
+# CONFIG_SENSORS_LM85 is not set
+# CONFIG_SENSORS_LM87 is not set
+# CONFIG_SENSORS_LM90 is not set
+# CONFIG_SENSORS_MAX1619 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_VIA686A is not set
+# CONFIG_SENSORS_W83781D is not set
+# CONFIG_SENSORS_W83L785TS is not set
+# CONFIG_SENSORS_W83627HF is not set
+
+#
+# Other I2C Chip support
+#
+# CONFIG_SENSORS_EEPROM is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_RTC8564 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+CONFIG_VIDEO_DEV=y
+
+#
+# Video For Linux
+#
+
+#
+# Video Adapters
+#
+# CONFIG_VIDEO_BT848 is not set
+# CONFIG_VIDEO_CPIA is not set
+# CONFIG_VIDEO_SAA5246A is not set
+# CONFIG_VIDEO_SAA5249 is not set
+# CONFIG_TUNER_3036 is not set
+# CONFIG_VIDEO_STRADIS is not set
+# CONFIG_VIDEO_ZORAN is not set
+# CONFIG_VIDEO_ZR36120 is not set
+# CONFIG_VIDEO_SAA7134 is not set
+# CONFIG_VIDEO_MXB is not set
+# CONFIG_VIDEO_DPC is not set
+# CONFIG_VIDEO_HEXIUM_ORION is not set
+# CONFIG_VIDEO_HEXIUM_GEMINI is not set
+# CONFIG_VIDEO_CX88 is not set
+# CONFIG_VIDEO_OVCAMCHIP is not set
+
+#
+# Radio Adapters
+#
+# CONFIG_RADIO_GEMTEK_PCI is not set
+# CONFIG_RADIO_MAXIRADIO is not set
+# CONFIG_RADIO_MAESTRO is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+CONFIG_FB=y
+CONFIG_FB_MODE_HELPERS=y
+# CONFIG_FB_TILEBLITTING is not set
+# CONFIG_FB_CIRRUS is not set
+# CONFIG_FB_PM2 is not set
+# CONFIG_FB_CYBER2000 is not set
+# CONFIG_FB_ASILIANT is not set
+# CONFIG_FB_IMSTT is not set
+# CONFIG_FB_RIVA is not set
+# CONFIG_FB_MATROX is not set
+# CONFIG_FB_RADEON_OLD is not set
+CONFIG_FB_RADEON=y
+CONFIG_FB_RADEON_I2C=y
+CONFIG_FB_RADEON_DEBUG=y
+# CONFIG_FB_ATY128 is not set
+# CONFIG_FB_ATY is not set
+# CONFIG_FB_SAVAGE is not set
+# CONFIG_FB_SIS is not set
+# CONFIG_FB_NEOMAGIC is not set
+# CONFIG_FB_KYRO is not set
+# CONFIG_FB_3DFX is not set
+# CONFIG_FB_VOODOO1 is not set
+# CONFIG_FB_TRIDENT is not set
+# CONFIG_FB_PM3 is not set
+# CONFIG_FB_VIRTUAL is not set
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+
+#
+# Logo configuration
+#
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_LOGO_LINUX_CLUT224=y
+
+#
+# Sound
+#
+CONFIG_SOUND=y
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=y
+CONFIG_SND_TIMER=y
+CONFIG_SND_PCM=y
+CONFIG_SND_HWDEP=y
+CONFIG_SND_RAWMIDI=y
+CONFIG_SND_SEQUENCER=y
+# CONFIG_SND_SEQ_DUMMY is not set
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=y
+CONFIG_SND_PCM_OSS=y
+CONFIG_SND_SEQUENCER_OSS=y
+# CONFIG_SND_VERBOSE_PRINTK is not set
+# CONFIG_SND_DEBUG is not set
+
+#
+# Generic devices
+#
+CONFIG_SND_MPU401_UART=y
+CONFIG_SND_OPL3_LIB=y
+# CONFIG_SND_DUMMY is not set
+# CONFIG_SND_VIRMIDI is not set
+# CONFIG_SND_MTPAV is not set
+# CONFIG_SND_SERIAL_U16550 is not set
+# CONFIG_SND_MPU401 is not set
+
+#
+# PCI devices
+#
+CONFIG_SND_AC97_CODEC=y
+# CONFIG_SND_ALI5451 is not set
+# CONFIG_SND_ATIIXP is not set
+# CONFIG_SND_ATIIXP_MODEM is not set
+# CONFIG_SND_AU8810 is not set
+# CONFIG_SND_AU8820 is not set
+# CONFIG_SND_AU8830 is not set
+# CONFIG_SND_AZT3328 is not set
+# CONFIG_SND_BT87X is not set
+# CONFIG_SND_CS46XX is not set
+# CONFIG_SND_CS4281 is not set
+# CONFIG_SND_EMU10K1 is not set
+# CONFIG_SND_KORG1212 is not set
+# CONFIG_SND_MIXART is not set
+# CONFIG_SND_NM256 is not set
+# CONFIG_SND_RME32 is not set
+# CONFIG_SND_RME96 is not set
+# CONFIG_SND_RME9652 is not set
+# CONFIG_SND_HDSP is not set
+# CONFIG_SND_TRIDENT is not set
+# CONFIG_SND_YMFPCI is not set
+# CONFIG_SND_ALS4000 is not set
+# CONFIG_SND_CMIPCI is not set
+# CONFIG_SND_ENS1370 is not set
+# CONFIG_SND_ENS1371 is not set
+# CONFIG_SND_ES1938 is not set
+# CONFIG_SND_ES1968 is not set
+# CONFIG_SND_MAESTRO3 is not set
+CONFIG_SND_FM801=y
+CONFIG_SND_FM801_TEA575X=y
+# CONFIG_SND_ICE1712 is not set
+# CONFIG_SND_ICE1724 is not set
+# CONFIG_SND_INTEL8X0 is not set
+# CONFIG_SND_INTEL8X0M is not set
+# CONFIG_SND_SONICVIBES is not set
+# CONFIG_SND_VIA82XX is not set
+# CONFIG_SND_VX222 is not set
+
+#
+# USB devices
+#
+# CONFIG_SND_USB_AUDIO is not set
+# CONFIG_SND_USB_USX2Y is not set
+
+#
+# Open Sound System
+#
+# CONFIG_SOUND_PRIME is not set
+
+#
+# USB support
+#
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+# CONFIG_USB_DEVICEFS is not set
+CONFIG_USB_BANDWIDTH=y
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_SUSPEND is not set
+# CONFIG_USB_OTG is not set
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_SPLIT_ISO is not set
+# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_UHCI_HCD=y
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_AUDIO is not set
+# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_USB_MIDI is not set
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+#
+CONFIG_USB_STORAGE=y
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_RW_DETECT is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
+# CONFIG_USB_STORAGE_DPCM is not set
+# CONFIG_USB_STORAGE_HP8200e is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_SDDR55 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=y
+CONFIG_USB_HIDINPUT=y
+# CONFIG_HID_FF is not set
+CONFIG_USB_HIDDEV=y
+# CONFIG_USB_AIPTEK is not set
+# CONFIG_USB_WACOM is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+# CONFIG_USB_HPUSBSCSI is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+# CONFIG_USB_VICAM is not set
+# CONFIG_USB_DSBR is not set
+# CONFIG_USB_IBMCAM is not set
+# CONFIG_USB_KONICAWC is not set
+# CONFIG_USB_OV511 is not set
+# CONFIG_USB_SE401 is not set
+# CONFIG_USB_SN9C102 is not set
+# CONFIG_USB_STV680 is not set
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_TIGL is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+
+#
+# USB ATM/DSL drivers
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+# CONFIG_EXT2_FS_POSIX_ACL is not set
+# CONFIG_EXT2_FS_SECURITY is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+CONFIG_AUTOFS_FS=y
+# CONFIG_AUTOFS4_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+# CONFIG_ZISOFS is not set
+CONFIG_UDF_FS=y
+CONFIG_UDF_NLS=y
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVPTS_FS_XATTR is not set
+CONFIG_TMPFS=y
+CONFIG_TMPFS_XATTR=y
+CONFIG_TMPFS_SECURITY=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_NFS_V4=y
+# CONFIG_NFS_DIRECTIO is not set
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V4 is not set
+# CONFIG_NFSD_TCP is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=y
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+CONFIG_RPCSEC_GSS_KRB5=y
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+CONFIG_EFI_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=y
+CONFIG_NLS_CODEPAGE_775=y
+CONFIG_NLS_CODEPAGE_850=y
+CONFIG_NLS_CODEPAGE_852=y
+CONFIG_NLS_CODEPAGE_855=y
+CONFIG_NLS_CODEPAGE_857=y
+CONFIG_NLS_CODEPAGE_860=y
+CONFIG_NLS_CODEPAGE_861=y
+CONFIG_NLS_CODEPAGE_862=y
+CONFIG_NLS_CODEPAGE_863=y
+CONFIG_NLS_CODEPAGE_864=y
+CONFIG_NLS_CODEPAGE_865=y
+CONFIG_NLS_CODEPAGE_866=y
+CONFIG_NLS_CODEPAGE_869=y
+CONFIG_NLS_CODEPAGE_936=y
+CONFIG_NLS_CODEPAGE_950=y
+CONFIG_NLS_CODEPAGE_932=y
+CONFIG_NLS_CODEPAGE_949=y
+CONFIG_NLS_CODEPAGE_874=y
+CONFIG_NLS_ISO8859_8=y
+# CONFIG_NLS_CODEPAGE_1250 is not set
+CONFIG_NLS_CODEPAGE_1251=y
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_2=y
+CONFIG_NLS_ISO8859_3=y
+CONFIG_NLS_ISO8859_4=y
+CONFIG_NLS_ISO8859_5=y
+CONFIG_NLS_ISO8859_6=y
+CONFIG_NLS_ISO8859_7=y
+CONFIG_NLS_ISO8859_9=y
+CONFIG_NLS_ISO8859_13=y
+CONFIG_NLS_ISO8859_14=y
+CONFIG_NLS_ISO8859_15=y
+CONFIG_NLS_KOI8_R=y
+CONFIG_NLS_KOI8_U=y
+CONFIG_NLS_UTF8=y
+
+#
+# Library routines
+#
+# CONFIG_CRC_CCITT is not set
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+
+#
+# Profiling support
+#
+# CONFIG_PROFILING is not set
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_INFO is not set
+CONFIG_IA64_GRANULE_16MB=y
+# CONFIG_IA64_GRANULE_64MB is not set
+CONFIG_IA64_PRINT_HAZARDS=y
+# CONFIG_DISABLE_VHPT is not set
+# CONFIG_IA64_DEBUG_CMPXCHG is not set
+# CONFIG_IA64_DEBUG_IRQ is not set
+CONFIG_SYSVIPC_COMPAT=y
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_WP512 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Hardware crypto devices
+#
diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig
new file mode 100644
index 00000000000..7539e83bf05
--- /dev/null
+++ b/arch/ia64/defconfig
@@ -0,0 +1,1199 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.10
+# Thu Jan 6 11:13:13 2005
+#
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+CONFIG_LOCK_KERNEL=y
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_HOTPLUG=y
+CONFIG_KOBJECT_UEVENT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+# CONFIG_TINY_SHMEM is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+CONFIG_MODVERSIONS=y
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
+
+#
+# Processor type and features
+#
+CONFIG_IA64=y
+CONFIG_64BIT=y
+CONFIG_MMU=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_TIME_INTERPOLATION=y
+CONFIG_EFI=y
+CONFIG_GENERIC_IOMAP=y
+CONFIG_IA64_GENERIC=y
+# CONFIG_IA64_DIG is not set
+# CONFIG_IA64_HP_ZX1 is not set
+# CONFIG_IA64_SGI_SN2 is not set
+# CONFIG_IA64_HP_SIM is not set
+# CONFIG_ITANIUM is not set
+CONFIG_MCKINLEY=y
+# CONFIG_IA64_PAGE_SIZE_4KB is not set
+# CONFIG_IA64_PAGE_SIZE_8KB is not set
+CONFIG_IA64_PAGE_SIZE_16KB=y
+# CONFIG_IA64_PAGE_SIZE_64KB is not set
+CONFIG_IA64_L1_CACHE_SHIFT=7
+CONFIG_NUMA=y
+CONFIG_VIRTUAL_MEM_MAP=y
+CONFIG_DISCONTIGMEM=y
+CONFIG_IA64_CYCLONE=y
+CONFIG_IOSAPIC=y
+CONFIG_FORCE_MAX_ZONEORDER=18
+CONFIG_SMP=y
+CONFIG_NR_CPUS=512
+CONFIG_HOTPLUG_CPU=y
+# CONFIG_PREEMPT is not set
+CONFIG_HAVE_DEC_LOCK=y
+CONFIG_IA32_SUPPORT=y
+CONFIG_COMPAT=y
+CONFIG_IA64_MCA_RECOVERY=y
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+CONFIG_ACPI_DEALLOCATE_IRQ=y
+
+#
+# Firmware Drivers
+#
+CONFIG_EFI_VARS=y
+CONFIG_EFI_PCDP=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=m
+
+#
+# Power management and ACPI
+#
+CONFIG_PM=y
+CONFIG_ACPI=y
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_VIDEO=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_HOTPLUG_CPU=y
+CONFIG_ACPI_THERMAL=m
+CONFIG_ACPI_NUMA=y
+CONFIG_ACPI_BLACKLIST_YEAR=0
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI_SYSTEM=y
+CONFIG_ACPI_CONTAINER=m
+
+#
+# Bus options (PCI, PCMCIA)
+#
+CONFIG_PCI=y
+CONFIG_PCI_DOMAINS=y
+# CONFIG_PCI_MSI is not set
+CONFIG_PCI_LEGACY_PROC=y
+CONFIG_PCI_NAMES=y
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=m
+# CONFIG_HOTPLUG_PCI_FAKE is not set
+CONFIG_HOTPLUG_PCI_ACPI=m
+# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
+# CONFIG_HOTPLUG_PCI_CPCI is not set
+# CONFIG_HOTPLUG_PCI_PCIE is not set
+# CONFIG_HOTPLUG_PCI_SHPC is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PC-card bridges
+#
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+# CONFIG_PNP is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+# CONFIG_BLK_DEV_SX8 is not set
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CDROM_PKTCDVD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+CONFIG_BLK_DEV_IDEDISK=y
+# CONFIG_IDEDISK_MULTI_MODE is not set
+CONFIG_BLK_DEV_IDECD=y
+# CONFIG_BLK_DEV_IDETAPE is not set
+CONFIG_BLK_DEV_IDEFLOPPY=y
+CONFIG_BLK_DEV_IDESCSI=m
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_IDEPCI=y
+# CONFIG_IDEPCI_SHARE_IRQ is not set
+# CONFIG_BLK_DEV_OFFBOARD is not set
+CONFIG_BLK_DEV_GENERIC=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+# CONFIG_BLK_DEV_AMD74XX is not set
+CONFIG_BLK_DEV_CMD64X=y
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_HPT34X is not set
+# CONFIG_BLK_DEV_HPT366 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_PDC202XX_OLD is not set
+# CONFIG_BLK_DEV_PDC202XX_NEW is not set
+# CONFIG_BLK_DEV_SVWKS is not set
+CONFIG_BLK_DEV_SGIIOC4=y
+# CONFIG_BLK_DEV_SIIMAGE is not set
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+# CONFIG_BLK_DEV_VIA82CXXX is not set
+# CONFIG_IDE_ARM is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+# CONFIG_CHR_DEV_OSST is not set
+CONFIG_BLK_DEV_SR=m
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_CHR_DEV_SG=m
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_FC_ATTRS=y
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_MEGARAID_NEWGEN is not set
+# CONFIG_MEGARAID_LEGACY is not set
+# CONFIG_SCSI_SATA is not set
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_QLOGIC_ISP is not set
+CONFIG_SCSI_QLOGIC_FC=y
+# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set
+CONFIG_SCSI_QLOGIC_1280=y
+# CONFIG_SCSI_QLOGIC_1280_1040 is not set
+CONFIG_SCSI_QLA2XXX=y
+CONFIG_SCSI_QLA21XX=m
+CONFIG_SCSI_QLA22XX=m
+CONFIG_SCSI_QLA2300=m
+CONFIG_SCSI_QLA2322=m
+# CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+# CONFIG_MD_RAID10 is not set
+CONFIG_MD_RAID5=m
+CONFIG_MD_RAID6=m
+CONFIG_MD_MULTIPATH=m
+# CONFIG_MD_FAULTY is not set
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=y
+CONFIG_FUSION_MAX_SGE=40
+# CONFIG_FUSION_CTL is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_NETLINK_DEV=y
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+CONFIG_ARPD=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_IP_TCPDIAG=y
+# CONFIG_IP_TCPDIAG_IPV6 is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETFILTER is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+# CONFIG_NET_CLS_ROUTE is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_RX is not set
+# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NET_POLL_CONTROLLER=y
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_ETHERTAP is not set
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=m
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_NET_VENDOR_3COM is not set
+
+#
+# Tulip family network device support
+#
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+CONFIG_TULIP=m
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_NAPI is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_HP100 is not set
+CONFIG_NET_PCI=y
+# CONFIG_PCNET32 is not set
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_B44 is not set
+# CONFIG_FORCEDETH is not set
+# CONFIG_DGRS is not set
+CONFIG_EEPRO100=m
+# CONFIG_EEPRO100_PIO is not set
+CONFIG_E100=m
+# CONFIG_E100_NAPI is not set
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+# CONFIG_NE2K_PCI is not set
+# CONFIG_8139CP is not set
+# CONFIG_8139TOO is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_VIA_RHINE is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+CONFIG_E1000=y
+# CONFIG_E1000_NAPI is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SK98LIN is not set
+# CONFIG_VIA_VELOCITY is not set
+CONFIG_TIGON3=y
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
+# CONFIG_SHAPER is not set
+CONFIG_NETCONSOLE=y
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+CONFIG_GAMEPORT=m
+CONFIG_SOUND_GAMEPORT=m
+# CONFIG_GAMEPORT_NS558 is not set
+# CONFIG_GAMEPORT_L4 is not set
+# CONFIG_GAMEPORT_EMU10K1 is not set
+# CONFIG_GAMEPORT_VORTEX is not set
+# CONFIG_GAMEPORT_FM801 is not set
+# CONFIG_GAMEPORT_CS461x is not set
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+# CONFIG_MOUSE_SERIAL is not set
+# CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_ROCKETPORT is not set
+# CONFIG_CYCLADES is not set
+# CONFIG_MOXA_SMARTIO is not set
+# CONFIG_SYNCLINK is not set
+# CONFIG_SYNCLINKMP is not set
+# CONFIG_N_HDLC is not set
+# CONFIG_STALDRV is not set
+CONFIG_SGI_SNSC=y
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_ACPI=y
+CONFIG_SERIAL_8250_NR_UARTS=6
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+# CONFIG_SERIAL_8250_MULTIPORT is not set
+# CONFIG_SERIAL_8250_RSA is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_SERIAL_SGI_L1_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
+CONFIG_EFI_RTC=y
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+CONFIG_AGP=m
+CONFIG_AGP_I460=m
+CONFIG_AGP_HP_ZX1=m
+CONFIG_DRM=m
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_MGA=m
+CONFIG_DRM_SIS=m
+CONFIG_RAW_DRIVER=m
+CONFIG_HPET=y
+# CONFIG_HPET_RTC_IRQ is not set
+CONFIG_HPET_MMAP=y
+CONFIG_MAX_RAW_DEVS=256
+CONFIG_MMTIMER=y
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+CONFIG_DUMMY_CONSOLE=y
+
+#
+# Sound
+#
+CONFIG_SOUND=m
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=m
+CONFIG_SND_TIMER=m
+CONFIG_SND_PCM=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_VERBOSE_PRINTK=y
+# CONFIG_SND_DEBUG is not set
+
+#
+# Generic devices
+#
+CONFIG_SND_MPU401_UART=m
+CONFIG_SND_OPL3_LIB=m
+CONFIG_SND_DUMMY=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_MTPAV=m
+CONFIG_SND_SERIAL_U16550=m
+CONFIG_SND_MPU401=m
+
+#
+# PCI devices
+#
+CONFIG_SND_AC97_CODEC=m
+# CONFIG_SND_ALI5451 is not set
+# CONFIG_SND_ATIIXP is not set
+# CONFIG_SND_ATIIXP_MODEM is not set
+# CONFIG_SND_AU8810 is not set
+# CONFIG_SND_AU8820 is not set
+# CONFIG_SND_AU8830 is not set
+# CONFIG_SND_AZT3328 is not set
+# CONFIG_SND_BT87X is not set
+CONFIG_SND_CS46XX=m
+CONFIG_SND_CS46XX_NEW_DSP=y
+CONFIG_SND_CS4281=m
+CONFIG_SND_EMU10K1=m
+# CONFIG_SND_KORG1212 is not set
+# CONFIG_SND_MIXART is not set
+# CONFIG_SND_NM256 is not set
+# CONFIG_SND_RME32 is not set
+# CONFIG_SND_RME96 is not set
+# CONFIG_SND_RME9652 is not set
+# CONFIG_SND_HDSP is not set
+# CONFIG_SND_TRIDENT is not set
+# CONFIG_SND_YMFPCI is not set
+# CONFIG_SND_ALS4000 is not set
+# CONFIG_SND_CMIPCI is not set
+# CONFIG_SND_ENS1370 is not set
+# CONFIG_SND_ENS1371 is not set
+# CONFIG_SND_ES1938 is not set
+# CONFIG_SND_ES1968 is not set
+# CONFIG_SND_MAESTRO3 is not set
+CONFIG_SND_FM801=m
+# CONFIG_SND_FM801_TEA575X is not set
+# CONFIG_SND_ICE1712 is not set
+# CONFIG_SND_ICE1724 is not set
+# CONFIG_SND_INTEL8X0 is not set
+# CONFIG_SND_INTEL8X0M is not set
+# CONFIG_SND_SONICVIBES is not set
+# CONFIG_SND_VIA82XX is not set
+# CONFIG_SND_VX222 is not set
+
+#
+# USB devices
+#
+# CONFIG_SND_USB_AUDIO is not set
+# CONFIG_SND_USB_USX2Y is not set
+
+#
+# Open Sound System
+#
+# CONFIG_SOUND_PRIME is not set
+
+#
+# USB support
+#
+CONFIG_USB=m
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_SUSPEND is not set
+# CONFIG_USB_OTG is not set
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+# CONFIG_USB_EHCI_SPLIT_ISO is not set
+# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_UHCI_HCD=m
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_AUDIO is not set
+# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_USB_MIDI is not set
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+#
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_RW_DETECT is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
+# CONFIG_USB_STORAGE_DPCM is not set
+# CONFIG_USB_STORAGE_HP8200e is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_SDDR55 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=m
+CONFIG_USB_HIDINPUT=y
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+
+#
+# USB HID Boot Protocol drivers
+#
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+# CONFIG_USB_AIPTEK is not set
+# CONFIG_USB_WACOM is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_TIGL is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_TEST is not set
+
+#
+# USB ATM/DSL drivers
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_MTHCA=m
+# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=y
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+# CONFIG_JFS_FS is not set
+CONFIG_FS_POSIX_ACL=y
+CONFIG_XFS_FS=y
+# CONFIG_XFS_RT is not set
+# CONFIG_XFS_QUOTA is not set
+# CONFIG_XFS_SECURITY is not set
+# CONFIG_XFS_POSIX_ACL is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+CONFIG_AUTOFS_FS=y
+CONFIG_AUTOFS4_FS=y
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+# CONFIG_ZISOFS is not set
+CONFIG_UDF_FS=m
+CONFIG_UDF_NLS=y
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+# CONFIG_MSDOS_FS is not set
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+CONFIG_NTFS_FS=m
+# CONFIG_NTFS_DEBUG is not set
+# CONFIG_NTFS_RW is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVPTS_FS_XATTR is not set
+CONFIG_TMPFS=y
+CONFIG_TMPFS_XATTR=y
+CONFIG_TMPFS_SECURITY=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_TCP=y
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=m
+CONFIG_SUNRPC=m
+CONFIG_SUNRPC_GSS=m
+CONFIG_RPCSEC_GSS_KRB5=m
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+CONFIG_SMB_FS=m
+CONFIG_SMB_NLS_DEFAULT=y
+CONFIG_SMB_NLS_REMOTE="cp437"
+CONFIG_CIFS=m
+# CONFIG_CIFS_STATS is not set
+# CONFIG_CIFS_XATTR is not set
+# CONFIG_CIFS_EXPERIMENTAL is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+CONFIG_SGI_PARTITION=y
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+CONFIG_EFI_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+
+#
+# Library routines
+#
+# CONFIG_CRC_CCITT is not set
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+
+#
+# HP Simulator drivers
+#
+# CONFIG_HP_SIMETH is not set
+# CONFIG_HP_SIMSERIAL is not set
+# CONFIG_HP_SIMSCSI is not set
+
+#
+# Profiling support
+#
+# CONFIG_PROFILING is not set
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_INFO is not set
+CONFIG_IA64_GRANULE_16MB=y
+# CONFIG_IA64_GRANULE_64MB is not set
+# CONFIG_IA64_PRINT_HAZARDS is not set
+# CONFIG_DISABLE_VHPT is not set
+# CONFIG_IA64_DEBUG_CMPXCHG is not set
+# CONFIG_IA64_DEBUG_IRQ is not set
+CONFIG_SYSVIPC_COMPAT=y
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=m
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_WP512 is not set
+CONFIG_CRYPTO_DES=m
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Hardware crypto devices
+#
diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile
new file mode 100644
index 00000000000..971cd7870dd
--- /dev/null
+++ b/arch/ia64/dig/Makefile
@@ -0,0 +1,9 @@
+#
+# ia64/platform/dig/Makefile
+#
+# Copyright (C) 1999 Silicon Graphics, Inc.
+# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+#
+
+obj-y := setup.o
+obj-$(CONFIG_IA64_GENERIC) += machvec.o
diff --git a/arch/ia64/dig/machvec.c b/arch/ia64/dig/machvec.c
new file mode 100644
index 00000000000..0c55bdafb47
--- /dev/null
+++ b/arch/ia64/dig/machvec.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME dig
+#define MACHVEC_PLATFORM_HEADER <asm/machvec_dig.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c
new file mode 100644
index 00000000000..d58003f1ad0
--- /dev/null
+++ b/arch/ia64/dig/setup.c
@@ -0,0 +1,86 @@
+/*
+ * Platform dependent support for DIG64 platforms.
+ *
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999, 2001 Hewlett-Packard Co
+ * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#include <linux/config.h>
+
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/kdev_t.h>
+#include <linux/string.h>
+#include <linux/tty.h>
+#include <linux/console.h>
+#include <linux/timex.h>
+#include <linux/sched.h>
+#include <linux/root_dev.h>
+
+#include <asm/io.h>
+#include <asm/machvec.h>
+#include <asm/system.h>
+
+/*
+ * This is here so we can use the CMOS detection in ide-probe.c to
+ * determine what drives are present. In theory, we don't need this
+ * as the auto-detection could be done via ide-probe.c:do_probe() but
+ * in practice that would be much slower, which is painful when
+ * running in the simulator. Note that passing zeroes in DRIVE_INFO
+ * is sufficient (the IDE driver will autodetect the drive geometry).
+ */
+char drive_info[4*16];
+
+void __init
+dig_setup (char **cmdline_p)
+{
+ unsigned int orig_x, orig_y, num_cols, num_rows, font_height;
+
+ /*
+ * Default to /dev/sda2. This assumes that the EFI partition
+ * is physical disk 1 partition 1 and the Linux root disk is
+ * physical disk 1 partition 2.
+ */
+ ROOT_DEV = Root_SDA2; /* default to second partition on first drive */
+
+#ifdef CONFIG_SMP
+ init_smp_config();
+#endif
+
+ memset(&screen_info, 0, sizeof(screen_info));
+
+ if (!ia64_boot_param->console_info.num_rows
+ || !ia64_boot_param->console_info.num_cols)
+ {
+ printk(KERN_WARNING "dig_setup: warning: invalid screen-info, guessing 80x25\n");
+ orig_x = 0;
+ orig_y = 0;
+ num_cols = 80;
+ num_rows = 25;
+ font_height = 16;
+ } else {
+ orig_x = ia64_boot_param->console_info.orig_x;
+ orig_y = ia64_boot_param->console_info.orig_y;
+ num_cols = ia64_boot_param->console_info.num_cols;
+ num_rows = ia64_boot_param->console_info.num_rows;
+ font_height = 400 / num_rows;
+ }
+
+ screen_info.orig_x = orig_x;
+ screen_info.orig_y = orig_y;
+ screen_info.orig_video_cols = num_cols;
+ screen_info.orig_video_lines = num_rows;
+ screen_info.orig_video_points = font_height;
+ screen_info.orig_video_mode = 3; /* XXX fake */
+ screen_info.orig_video_isVGA = 1; /* XXX fake */
+ screen_info.orig_video_ega_bx = 3; /* XXX fake */
+}
+
+void __init
+dig_irq_init (void)
+{
+}
diff --git a/arch/ia64/hp/common/Makefile b/arch/ia64/hp/common/Makefile
new file mode 100644
index 00000000000..f61a60057ff
--- /dev/null
+++ b/arch/ia64/hp/common/Makefile
@@ -0,0 +1,10 @@
+#
+# ia64/platform/hp/common/Makefile
+#
+# Copyright (C) 2002 Hewlett Packard
+# Copyright (C) Alex Williamson (alex_williamson@hp.com)
+#
+
+obj-y := sba_iommu.o
+obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += hwsw_iommu.o
+obj-$(CONFIG_IA64_GENERIC) += hwsw_iommu.o
diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
new file mode 100644
index 00000000000..80f8ef01393
--- /dev/null
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2004 Hewlett-Packard Development Company, L.P.
+ * Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * This is a pseudo I/O MMU which dispatches to the hardware I/O MMU
+ * whenever possible. We assume that the hardware I/O MMU requires
+ * full 32-bit addressability, as is the case, e.g., for HP zx1-based
+ * systems (there, the I/O MMU window is mapped at 3-4GB). If a
+ * device doesn't provide full 32-bit addressability, we fall back on
+ * the sw I/O TLB. This is good enough to let us support broken
+ * hardware such as soundcards which have a DMA engine that can
+ * address only 28 bits.
+ */
+
+#include <linux/device.h>
+
+#include <asm/machvec.h>
+
+/* swiotlb declarations & definitions: */
+extern void swiotlb_init_with_default_size (size_t size);
+extern ia64_mv_dma_alloc_coherent swiotlb_alloc_coherent;
+extern ia64_mv_dma_free_coherent swiotlb_free_coherent;
+extern ia64_mv_dma_map_single swiotlb_map_single;
+extern ia64_mv_dma_unmap_single swiotlb_unmap_single;
+extern ia64_mv_dma_map_sg swiotlb_map_sg;
+extern ia64_mv_dma_unmap_sg swiotlb_unmap_sg;
+extern ia64_mv_dma_supported swiotlb_dma_supported;
+extern ia64_mv_dma_mapping_error swiotlb_dma_mapping_error;
+
+/* hwiommu declarations & definitions: */
+
+extern ia64_mv_dma_alloc_coherent sba_alloc_coherent;
+extern ia64_mv_dma_free_coherent sba_free_coherent;
+extern ia64_mv_dma_map_single sba_map_single;
+extern ia64_mv_dma_unmap_single sba_unmap_single;
+extern ia64_mv_dma_map_sg sba_map_sg;
+extern ia64_mv_dma_unmap_sg sba_unmap_sg;
+extern ia64_mv_dma_supported sba_dma_supported;
+extern ia64_mv_dma_mapping_error sba_dma_mapping_error;
+
+#define hwiommu_alloc_coherent sba_alloc_coherent
+#define hwiommu_free_coherent sba_free_coherent
+#define hwiommu_map_single sba_map_single
+#define hwiommu_unmap_single sba_unmap_single
+#define hwiommu_map_sg sba_map_sg
+#define hwiommu_unmap_sg sba_unmap_sg
+#define hwiommu_dma_supported sba_dma_supported
+#define hwiommu_dma_mapping_error sba_dma_mapping_error
+#define hwiommu_sync_single_for_cpu machvec_dma_sync_single
+#define hwiommu_sync_sg_for_cpu machvec_dma_sync_sg
+#define hwiommu_sync_single_for_device machvec_dma_sync_single
+#define hwiommu_sync_sg_for_device machvec_dma_sync_sg
+
+
+/*
+ * Note: we need to make the determination of whether or not to use
+ * the sw I/O TLB based purely on the device structure. Anything else
+ * would be unreliable or would be too intrusive.
+ */
+static inline int
+use_swiotlb (struct device *dev)
+{
+ return dev && dev->dma_mask && !hwiommu_dma_supported(dev, *dev->dma_mask);
+}
+
+void
+hwsw_init (void)
+{
+ /* default to a smallish 2MB sw I/O TLB */
+ swiotlb_init_with_default_size (2 * (1<<20));
+}
+
+void *
+hwsw_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags)
+{
+ if (use_swiotlb(dev))
+ return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
+ else
+ return hwiommu_alloc_coherent(dev, size, dma_handle, flags);
+}
+
+void
+hwsw_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle)
+{
+ if (use_swiotlb(dev))
+ swiotlb_free_coherent(dev, size, vaddr, dma_handle);
+ else
+ hwiommu_free_coherent(dev, size, vaddr, dma_handle);
+}
+
+dma_addr_t
+hwsw_map_single (struct device *dev, void *addr, size_t size, int dir)
+{
+ if (use_swiotlb(dev))
+ return swiotlb_map_single(dev, addr, size, dir);
+ else
+ return hwiommu_map_single(dev, addr, size, dir);
+}
+
+void
+hwsw_unmap_single (struct device *dev, dma_addr_t iova, size_t size, int dir)
+{
+ if (use_swiotlb(dev))
+ return swiotlb_unmap_single(dev, iova, size, dir);
+ else
+ return hwiommu_unmap_single(dev, iova, size, dir);
+}
+
+
+int
+hwsw_map_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
+{
+ if (use_swiotlb(dev))
+ return swiotlb_map_sg(dev, sglist, nents, dir);
+ else
+ return hwiommu_map_sg(dev, sglist, nents, dir);
+}
+
+void
+hwsw_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
+{
+ if (use_swiotlb(dev))
+ return swiotlb_unmap_sg(dev, sglist, nents, dir);
+ else
+ return hwiommu_unmap_sg(dev, sglist, nents, dir);
+}
+
+void
+hwsw_sync_single_for_cpu (struct device *dev, dma_addr_t addr, size_t size, int dir)
+{
+ if (use_swiotlb(dev))
+ swiotlb_sync_single_for_cpu(dev, addr, size, dir);
+ else
+ hwiommu_sync_single_for_cpu(dev, addr, size, dir);
+}
+
+void
+hwsw_sync_sg_for_cpu (struct device *dev, struct scatterlist *sg, int nelems, int dir)
+{
+ if (use_swiotlb(dev))
+ swiotlb_sync_sg_for_cpu(dev, sg, nelems, dir);
+ else
+ hwiommu_sync_sg_for_cpu(dev, sg, nelems, dir);
+}
+
+void
+hwsw_sync_single_for_device (struct device *dev, dma_addr_t addr, size_t size, int dir)
+{
+ if (use_swiotlb(dev))
+ swiotlb_sync_single_for_device(dev, addr, size, dir);
+ else
+ hwiommu_sync_single_for_device(dev, addr, size, dir);
+}
+
+void
+hwsw_sync_sg_for_device (struct device *dev, struct scatterlist *sg, int nelems, int dir)
+{
+ if (use_swiotlb(dev))
+ swiotlb_sync_sg_for_device(dev, sg, nelems, dir);
+ else
+ hwiommu_sync_sg_for_device(dev, sg, nelems, dir);
+}
+
+int
+hwsw_dma_supported (struct device *dev, u64 mask)
+{
+ if (hwiommu_dma_supported(dev, mask))
+ return 1;
+ return swiotlb_dma_supported(dev, mask);
+}
+
+int
+hwsw_dma_mapping_error (dma_addr_t dma_addr)
+{
+ return hwiommu_dma_mapping_error (dma_addr) || swiotlb_dma_mapping_error(dma_addr);
+}
+
+EXPORT_SYMBOL(hwsw_dma_mapping_error);
+EXPORT_SYMBOL(hwsw_map_single);
+EXPORT_SYMBOL(hwsw_unmap_single);
+EXPORT_SYMBOL(hwsw_map_sg);
+EXPORT_SYMBOL(hwsw_unmap_sg);
+EXPORT_SYMBOL(hwsw_dma_supported);
+EXPORT_SYMBOL(hwsw_alloc_coherent);
+EXPORT_SYMBOL(hwsw_free_coherent);
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
new file mode 100644
index 00000000000..017c9ab5fc1
--- /dev/null
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -0,0 +1,2121 @@
+/*
+** IA64 System Bus Adapter (SBA) I/O MMU manager
+**
+** (c) Copyright 2002-2004 Alex Williamson
+** (c) Copyright 2002-2003 Grant Grundler
+** (c) Copyright 2002-2004 Hewlett-Packard Company
+**
+** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code)
+** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code)
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+**
+** This module initializes the IOC (I/O Controller) found on HP
+** McKinley machines and their successors.
+**
+*/
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/nodemask.h>
+#include <linux/bitops.h> /* hweight64() */
+
+#include <asm/delay.h> /* ia64_get_itc() */
+#include <asm/io.h>
+#include <asm/page.h> /* PAGE_OFFSET */
+#include <asm/dma.h>
+#include <asm/system.h> /* wmb() */
+
+#include <asm/acpi-ext.h>
+
+#define PFX "IOC: "
+
+/*
+** Enabling timing search of the pdir resource map. Output in /proc.
+** Disabled by default to optimize performance.
+*/
+#undef PDIR_SEARCH_TIMING
+
+/*
+** This option allows cards capable of 64bit DMA to bypass the IOMMU. If
+** not defined, all DMA will be 32bit and go through the TLB.
+** There's potentially a conflict in the bio merge code with us
+** advertising an iommu, but then bypassing it. Since I/O MMU bypassing
+** appears to give more performance than bio-level virtual merging, we'll
+** do the former for now. NOTE: BYPASS_SG also needs to be undef'd to
+** completely restrict DMA to the IOMMU.
+*/
+#define ALLOW_IOV_BYPASS
+
+/*
+** This option specifically allows/disallows bypassing scatterlists with
+** multiple entries. Coalescing these entries can allow better DMA streaming
+** and in some cases shows better performance than entirely bypassing the
+** IOMMU. Performance increase on the order of 1-2% sequential output/input
+** using bonnie++ on a RAID0 MD device (sym2 & mpt).
+*/
+#undef ALLOW_IOV_BYPASS_SG
+
+/*
+** If a device prefetches beyond the end of a valid pdir entry, it will cause
+** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should
+** disconnect on 4k boundaries and prevent such issues. If the device is
+** particularly agressive, this option will keep the entire pdir valid such
+** that prefetching will hit a valid address. This could severely impact
+** error containment, and is therefore off by default. The page that is
+** used for spill-over is poisoned, so that should help debugging somewhat.
+*/
+#undef FULL_VALID_PDIR
+
+#define ENABLE_MARK_CLEAN
+
+/*
+** The number of debug flags is a clue - this code is fragile. NOTE: since
+** tightening the use of res_lock the resource bitmap and actual pdir are no
+** longer guaranteed to stay in sync. The sanity checking code isn't going to
+** like that.
+*/
+#undef DEBUG_SBA_INIT
+#undef DEBUG_SBA_RUN
+#undef DEBUG_SBA_RUN_SG
+#undef DEBUG_SBA_RESOURCE
+#undef ASSERT_PDIR_SANITY
+#undef DEBUG_LARGE_SG_ENTRIES
+#undef DEBUG_BYPASS
+
+#if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY)
+#error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive
+#endif
+
+#define SBA_INLINE __inline__
+/* #define SBA_INLINE */
+
+#ifdef DEBUG_SBA_INIT
+#define DBG_INIT(x...) printk(x)
+#else
+#define DBG_INIT(x...)
+#endif
+
+#ifdef DEBUG_SBA_RUN
+#define DBG_RUN(x...) printk(x)
+#else
+#define DBG_RUN(x...)
+#endif
+
+#ifdef DEBUG_SBA_RUN_SG
+#define DBG_RUN_SG(x...) printk(x)
+#else
+#define DBG_RUN_SG(x...)
+#endif
+
+
+#ifdef DEBUG_SBA_RESOURCE
+#define DBG_RES(x...) printk(x)
+#else
+#define DBG_RES(x...)
+#endif
+
+#ifdef DEBUG_BYPASS
+#define DBG_BYPASS(x...) printk(x)
+#else
+#define DBG_BYPASS(x...)
+#endif
+
+#ifdef ASSERT_PDIR_SANITY
+#define ASSERT(expr) \
+ if(!(expr)) { \
+ printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \
+ panic(#expr); \
+ }
+#else
+#define ASSERT(expr)
+#endif
+
+/*
+** The number of pdir entries to "free" before issuing
+** a read to PCOM register to flush out PCOM writes.
+** Interacts with allocation granularity (ie 4 or 8 entries
+** allocated and free'd/purged at a time might make this
+** less interesting).
+*/
+#define DELAYED_RESOURCE_CNT 64
+
+#define ZX1_IOC_ID ((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP)
+#define ZX2_IOC_ID ((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP)
+#define REO_IOC_ID ((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP)
+#define SX1000_IOC_ID ((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP)
+
+#define ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */
+
+#define IOC_FUNC_ID 0x000
+#define IOC_FCLASS 0x008 /* function class, bist, header, rev... */
+#define IOC_IBASE 0x300 /* IO TLB */
+#define IOC_IMASK 0x308
+#define IOC_PCOM 0x310
+#define IOC_TCNFG 0x318
+#define IOC_PDIR_BASE 0x320
+
+#define IOC_ROPE0_CFG 0x500
+#define IOC_ROPE_AO 0x10 /* Allow "Relaxed Ordering" */
+
+
+/* AGP GART driver looks for this */
+#define ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL
+
+/*
+** The zx1 IOC supports 4/8/16/64KB page sizes (see TCNFG register)
+**
+** Some IOCs (sx1000) can run at the above pages sizes, but are
+** really only supported using the IOC at a 4k page size.
+**
+** iovp_size could only be greater than PAGE_SIZE if we are
+** confident the drivers really only touch the next physical
+** page iff that driver instance owns it.
+*/
+static unsigned long iovp_size;
+static unsigned long iovp_shift;
+static unsigned long iovp_mask;
+
+struct ioc {
+ void __iomem *ioc_hpa; /* I/O MMU base address */
+ char *res_map; /* resource map, bit == pdir entry */
+ u64 *pdir_base; /* physical base address */
+ unsigned long ibase; /* pdir IOV Space base */
+ unsigned long imask; /* pdir IOV Space mask */
+
+ unsigned long *res_hint; /* next avail IOVP - circular search */
+ unsigned long dma_mask;
+ spinlock_t res_lock; /* protects the resource bitmap, but must be held when */
+ /* clearing pdir to prevent races with allocations. */
+ unsigned int res_bitshift; /* from the RIGHT! */
+ unsigned int res_size; /* size of resource map in bytes */
+#ifdef CONFIG_NUMA
+ unsigned int node; /* node where this IOC lives */
+#endif
+#if DELAYED_RESOURCE_CNT > 0
+ spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */
+ /* than res_lock for bigger systems. */
+ int saved_cnt;
+ struct sba_dma_pair {
+ dma_addr_t iova;
+ size_t size;
+ } saved[DELAYED_RESOURCE_CNT];
+#endif
+
+#ifdef PDIR_SEARCH_TIMING
+#define SBA_SEARCH_SAMPLE 0x100
+ unsigned long avg_search[SBA_SEARCH_SAMPLE];
+ unsigned long avg_idx; /* current index into avg_search */
+#endif
+
+ /* Stuff we don't need in performance path */
+ struct ioc *next; /* list of IOC's in system */
+ acpi_handle handle; /* for multiple IOC's */
+ const char *name;
+ unsigned int func_id;
+ unsigned int rev; /* HW revision of chip */
+ u32 iov_size;
+ unsigned int pdir_size; /* in bytes, determined by IOV Space size */
+ struct pci_dev *sac_only_dev;
+};
+
+static struct ioc *ioc_list;
+static int reserve_sba_gart = 1;
+
+static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t);
+static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t);
+
+#define sba_sg_address(sg) (page_address((sg)->page) + (sg)->offset)
+
+#ifdef FULL_VALID_PDIR
+static u64 prefetch_spill_page;
+#endif
+
+#ifdef CONFIG_PCI
+# define GET_IOC(dev) (((dev)->bus == &pci_bus_type) \
+ ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL)
+#else
+# define GET_IOC(dev) NULL
+#endif
+
+/*
+** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up
+** (or rather not merge) DMA's into managable chunks.
+** On parisc, this is more of the software/tuning constraint
+** rather than the HW. I/O MMU allocation alogorithms can be
+** faster with smaller size is (to some degree).
+*/
+#define DMA_CHUNK_SIZE (BITS_PER_LONG*iovp_size)
+
+#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1))
+
+/************************************
+** SBA register read and write support
+**
+** BE WARNED: register writes are posted.
+** (ie follow writes which must reach HW with a read)
+**
+*/
+#define READ_REG(addr) __raw_readq(addr)
+#define WRITE_REG(val, addr) __raw_writeq(val, addr)
+
+#ifdef DEBUG_SBA_INIT
+
+/**
+ * sba_dump_tlb - debugging only - print IOMMU operating parameters
+ * @hpa: base address of the IOMMU
+ *
+ * Print the size/location of the IO MMU PDIR.
+ */
+static void
+sba_dump_tlb(char *hpa)
+{
+ DBG_INIT("IO TLB at 0x%p\n", (void *)hpa);
+ DBG_INIT("IOC_IBASE : %016lx\n", READ_REG(hpa+IOC_IBASE));
+ DBG_INIT("IOC_IMASK : %016lx\n", READ_REG(hpa+IOC_IMASK));
+ DBG_INIT("IOC_TCNFG : %016lx\n", READ_REG(hpa+IOC_TCNFG));
+ DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE));
+ DBG_INIT("\n");
+}
+#endif
+
+
+#ifdef ASSERT_PDIR_SANITY
+
+/**
+ * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @msg: text to print ont the output line.
+ * @pide: pdir index.
+ *
+ * Print one entry of the IO MMU PDIR in human readable form.
+ */
+static void
+sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide)
+{
+ /* start printing from lowest pde in rval */
+ u64 *ptr = &ioc->pdir_base[pide & ~(BITS_PER_LONG - 1)];
+ unsigned long *rptr = (unsigned long *) &ioc->res_map[(pide >>3) & -sizeof(unsigned long)];
+ uint rcnt;
+
+ printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n",
+ msg, rptr, pide & (BITS_PER_LONG - 1), *rptr);
+
+ rcnt = 0;
+ while (rcnt < BITS_PER_LONG) {
+ printk(KERN_DEBUG "%s %2d %p %016Lx\n",
+ (rcnt == (pide & (BITS_PER_LONG - 1)))
+ ? " -->" : " ",
+ rcnt, ptr, (unsigned long long) *ptr );
+ rcnt++;
+ ptr++;
+ }
+ printk(KERN_DEBUG "%s", msg);
+}
+
+
+/**
+ * sba_check_pdir - debugging only - consistency checker
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @msg: text to print ont the output line.
+ *
+ * Verify the resource map and pdir state is consistent
+ */
+static int
+sba_check_pdir(struct ioc *ioc, char *msg)
+{
+ u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]);
+ u64 *rptr = (u64 *) ioc->res_map; /* resource map ptr */
+ u64 *pptr = ioc->pdir_base; /* pdir ptr */
+ uint pide = 0;
+
+ while (rptr < rptr_end) {
+ u64 rval;
+ int rcnt; /* number of bits we might check */
+
+ rval = *rptr;
+ rcnt = 64;
+
+ while (rcnt) {
+ /* Get last byte and highest bit from that */
+ u32 pde = ((u32)((*pptr >> (63)) & 0x1));
+ if ((rval & 0x1) ^ pde)
+ {
+ /*
+ ** BUMMER! -- res_map != pdir --
+ ** Dump rval and matching pdir entries
+ */
+ sba_dump_pdir_entry(ioc, msg, pide);
+ return(1);
+ }
+ rcnt--;
+ rval >>= 1; /* try the next bit */
+ pptr++;
+ pide++;
+ }
+ rptr++; /* look at next word of res_map */
+ }
+ /* It'd be nice if we always got here :^) */
+ return 0;
+}
+
+
+/**
+ * sba_dump_sg - debugging only - print Scatter-Gather list
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @startsg: head of the SG list
+ * @nents: number of entries in SG list
+ *
+ * print the SG list so we can verify it's correct by hand.
+ */
+static void
+sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
+{
+ while (nents-- > 0) {
+ printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents,
+ startsg->dma_address, startsg->dma_length,
+ sba_sg_address(startsg));
+ startsg++;
+ }
+}
+
+static void
+sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
+{
+ struct scatterlist *the_sg = startsg;
+ int the_nents = nents;
+
+ while (the_nents-- > 0) {
+ if (sba_sg_address(the_sg) == 0x0UL)
+ sba_dump_sg(NULL, startsg, nents);
+ the_sg++;
+ }
+}
+
+#endif /* ASSERT_PDIR_SANITY */
+
+
+
+
+/**************************************************************
+*
+* I/O Pdir Resource Management
+*
+* Bits set in the resource map are in use.
+* Each bit can represent a number of pages.
+* LSbs represent lower addresses (IOVA's).
+*
+***************************************************************/
+#define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */
+
+/* Convert from IOVP to IOVA and vice versa. */
+#define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset))
+#define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase))
+
+#define PDIR_ENTRY_SIZE sizeof(u64)
+
+#define PDIR_INDEX(iovp) ((iovp)>>iovp_shift)
+
+#define RESMAP_MASK(n) ~(~0UL << (n))
+#define RESMAP_IDX_MASK (sizeof(unsigned long) - 1)
+
+
+/**
+ * For most cases the normal get_order is sufficient, however it limits us
+ * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity.
+ * It only incurs about 1 clock cycle to use this one with the static variable
+ * and makes the code more intuitive.
+ */
+static SBA_INLINE int
+get_iovp_order (unsigned long size)
+{
+ long double d = size - 1;
+ long order;
+
+ order = ia64_getf_exp(d);
+ order = order - iovp_shift - 0xffff + 1;
+ if (order < 0)
+ order = 0;
+ return order;
+}
+
+/**
+ * sba_search_bitmap - find free space in IO PDIR resource bitmap
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @bits_wanted: number of entries we need.
+ *
+ * Find consecutive free bits in resource bitmap.
+ * Each bit represents one entry in the IO Pdir.
+ * Cool perf optimization: search for log2(size) bits at a time.
+ */
+static SBA_INLINE unsigned long
+sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted)
+{
+ unsigned long *res_ptr = ioc->res_hint;
+ unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
+ unsigned long pide = ~0UL;
+
+ ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
+ ASSERT(res_ptr < res_end);
+
+ /*
+ * N.B. REO/Grande defect AR2305 can cause TLB fetch timeouts
+ * if a TLB entry is purged while in use. sba_mark_invalid()
+ * purges IOTLB entries in power-of-two sizes, so we also
+ * allocate IOVA space in power-of-two sizes.
+ */
+ bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift);
+
+ if (likely(bits_wanted == 1)) {
+ unsigned int bitshiftcnt;
+ for(; res_ptr < res_end ; res_ptr++) {
+ if (likely(*res_ptr != ~0UL)) {
+ bitshiftcnt = ffz(*res_ptr);
+ *res_ptr |= (1UL << bitshiftcnt);
+ pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
+ pide <<= 3; /* convert to bit address */
+ pide += bitshiftcnt;
+ ioc->res_bitshift = bitshiftcnt + bits_wanted;
+ goto found_it;
+ }
+ }
+ goto not_found;
+
+ }
+
+ if (likely(bits_wanted <= BITS_PER_LONG/2)) {
+ /*
+ ** Search the resource bit map on well-aligned values.
+ ** "o" is the alignment.
+ ** We need the alignment to invalidate I/O TLB using
+ ** SBA HW features in the unmap path.
+ */
+ unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift);
+ uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o);
+ unsigned long mask, base_mask;
+
+ base_mask = RESMAP_MASK(bits_wanted);
+ mask = base_mask << bitshiftcnt;
+
+ DBG_RES("%s() o %ld %p", __FUNCTION__, o, res_ptr);
+ for(; res_ptr < res_end ; res_ptr++)
+ {
+ DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr);
+ ASSERT(0 != mask);
+ for (; mask ; mask <<= o, bitshiftcnt += o) {
+ if(0 == ((*res_ptr) & mask)) {
+ *res_ptr |= mask; /* mark resources busy! */
+ pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
+ pide <<= 3; /* convert to bit address */
+ pide += bitshiftcnt;
+ ioc->res_bitshift = bitshiftcnt + bits_wanted;
+ goto found_it;
+ }
+ }
+
+ bitshiftcnt = 0;
+ mask = base_mask;
+
+ }
+
+ } else {
+ int qwords, bits, i;
+ unsigned long *end;
+
+ qwords = bits_wanted >> 6; /* /64 */
+ bits = bits_wanted - (qwords * BITS_PER_LONG);
+
+ end = res_end - qwords;
+
+ for (; res_ptr < end; res_ptr++) {
+ for (i = 0 ; i < qwords ; i++) {
+ if (res_ptr[i] != 0)
+ goto next_ptr;
+ }
+ if (bits && res_ptr[i] && (__ffs(res_ptr[i]) < bits))
+ continue;
+
+ /* Found it, mark it */
+ for (i = 0 ; i < qwords ; i++)
+ res_ptr[i] = ~0UL;
+ res_ptr[i] |= RESMAP_MASK(bits);
+
+ pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
+ pide <<= 3; /* convert to bit address */
+ res_ptr += qwords;
+ ioc->res_bitshift = bits;
+ goto found_it;
+next_ptr:
+ ;
+ }
+ }
+
+not_found:
+ prefetch(ioc->res_map);
+ ioc->res_hint = (unsigned long *) ioc->res_map;
+ ioc->res_bitshift = 0;
+ return (pide);
+
+found_it:
+ ioc->res_hint = res_ptr;
+ return (pide);
+}
+
+
+/**
+ * sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @size: number of bytes to create a mapping for
+ *
+ * Given a size, find consecutive unmarked and then mark those bits in the
+ * resource bit map.
+ */
+static int
+sba_alloc_range(struct ioc *ioc, size_t size)
+{
+ unsigned int pages_needed = size >> iovp_shift;
+#ifdef PDIR_SEARCH_TIMING
+ unsigned long itc_start;
+#endif
+ unsigned long pide;
+ unsigned long flags;
+
+ ASSERT(pages_needed);
+ ASSERT(0 == (size & ~iovp_mask));
+
+ spin_lock_irqsave(&ioc->res_lock, flags);
+
+#ifdef PDIR_SEARCH_TIMING
+ itc_start = ia64_get_itc();
+#endif
+ /*
+ ** "seek and ye shall find"...praying never hurts either...
+ */
+ pide = sba_search_bitmap(ioc, pages_needed);
+ if (unlikely(pide >= (ioc->res_size << 3))) {
+ pide = sba_search_bitmap(ioc, pages_needed);
+ if (unlikely(pide >= (ioc->res_size << 3))) {
+#if DELAYED_RESOURCE_CNT > 0
+ /*
+ ** With delayed resource freeing, we can give this one more shot. We're
+ ** getting close to being in trouble here, so do what we can to make this
+ ** one count.
+ */
+ spin_lock(&ioc->saved_lock);
+ if (ioc->saved_cnt > 0) {
+ struct sba_dma_pair *d;
+ int cnt = ioc->saved_cnt;
+
+ d = &(ioc->saved[ioc->saved_cnt]);
+
+ while (cnt--) {
+ sba_mark_invalid(ioc, d->iova, d->size);
+ sba_free_range(ioc, d->iova, d->size);
+ d--;
+ }
+ ioc->saved_cnt = 0;
+ READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */
+ }
+ spin_unlock(&ioc->saved_lock);
+
+ pide = sba_search_bitmap(ioc, pages_needed);
+ if (unlikely(pide >= (ioc->res_size << 3)))
+ panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
+ ioc->ioc_hpa);
+#else
+ panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
+ ioc->ioc_hpa);
+#endif
+ }
+ }
+
+#ifdef PDIR_SEARCH_TIMING
+ ioc->avg_search[ioc->avg_idx++] = (ia64_get_itc() - itc_start) / pages_needed;
+ ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1;
+#endif
+
+ prefetchw(&(ioc->pdir_base[pide]));
+
+#ifdef ASSERT_PDIR_SANITY
+ /* verify the first enable bit is clear */
+ if(0x00 != ((u8 *) ioc->pdir_base)[pide*PDIR_ENTRY_SIZE + 7]) {
+ sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide);
+ }
+#endif
+
+ DBG_RES("%s(%x) %d -> %lx hint %x/%x\n",
+ __FUNCTION__, size, pages_needed, pide,
+ (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map),
+ ioc->res_bitshift );
+
+ spin_unlock_irqrestore(&ioc->res_lock, flags);
+
+ return (pide);
+}
+
+
+/**
+ * sba_free_range - unmark bits in IO PDIR resource bitmap
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @iova: IO virtual address which was previously allocated.
+ * @size: number of bytes to create a mapping for
+ *
+ * clear bits in the ioc's resource map
+ */
+static SBA_INLINE void
+sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size)
+{
+ unsigned long iovp = SBA_IOVP(ioc, iova);
+ unsigned int pide = PDIR_INDEX(iovp);
+ unsigned int ridx = pide >> 3; /* convert bit to byte address */
+ unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]);
+ int bits_not_wanted = size >> iovp_shift;
+ unsigned long m;
+
+ /* Round up to power-of-two size: see AR2305 note above */
+ bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift);
+ for (; bits_not_wanted > 0 ; res_ptr++) {
+
+ if (unlikely(bits_not_wanted > BITS_PER_LONG)) {
+
+ /* these mappings start 64bit aligned */
+ *res_ptr = 0UL;
+ bits_not_wanted -= BITS_PER_LONG;
+ pide += BITS_PER_LONG;
+
+ } else {
+
+ /* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */
+ m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1));
+ bits_not_wanted = 0;
+
+ DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __FUNCTION__, (uint) iova, size,
+ bits_not_wanted, m, pide, res_ptr, *res_ptr);
+
+ ASSERT(m != 0);
+ ASSERT(bits_not_wanted);
+ ASSERT((*res_ptr & m) == m); /* verify same bits are set */
+ *res_ptr &= ~m;
+ }
+ }
+}
+
+
+/**************************************************************
+*
+* "Dynamic DMA Mapping" support (aka "Coherent I/O")
+*
+***************************************************************/
+
+/**
+ * sba_io_pdir_entry - fill in one IO PDIR entry
+ * @pdir_ptr: pointer to IO PDIR entry
+ * @vba: Virtual CPU address of buffer to map
+ *
+ * SBA Mapping Routine
+ *
+ * Given a virtual address (vba, arg1) sba_io_pdir_entry()
+ * loads the I/O PDIR entry pointed to by pdir_ptr (arg0).
+ * Each IO Pdir entry consists of 8 bytes as shown below
+ * (LSB == bit 0):
+ *
+ * 63 40 11 7 0
+ * +-+---------------------+----------------------------------+----+--------+
+ * |V| U | PPN[39:12] | U | FF |
+ * +-+---------------------+----------------------------------+----+--------+
+ *
+ * V == Valid Bit
+ * U == Unused
+ * PPN == Physical Page Number
+ *
+ * The physical address fields are filled with the results of virt_to_phys()
+ * on the vba.
+ */
+
+#if 1
+#define sba_io_pdir_entry(pdir_ptr, vba) *pdir_ptr = ((vba & ~0xE000000000000FFFULL) \
+ | 0x8000000000000000ULL)
+#else
+void SBA_INLINE
+sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba)
+{
+ *pdir_ptr = ((vba & ~0xE000000000000FFFULL) | 0x80000000000000FFULL);
+}
+#endif
+
+#ifdef ENABLE_MARK_CLEAN
+/**
+ * Since DMA is i-cache coherent, any (complete) pages that were written via
+ * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
+ * flush them when they get mapped into an executable vm-area.
+ */
+static void
+mark_clean (void *addr, size_t size)
+{
+ unsigned long pg_addr, end;
+
+ pg_addr = PAGE_ALIGN((unsigned long) addr);
+ end = (unsigned long) addr + size;
+ while (pg_addr + PAGE_SIZE <= end) {
+ struct page *page = virt_to_page((void *)pg_addr);
+ set_bit(PG_arch_1, &page->flags);
+ pg_addr += PAGE_SIZE;
+ }
+}
+#endif
+
+/**
+ * sba_mark_invalid - invalidate one or more IO PDIR entries
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @iova: IO Virtual Address mapped earlier
+ * @byte_cnt: number of bytes this mapping covers.
+ *
+ * Marking the IO PDIR entry(ies) as Invalid and invalidate
+ * corresponding IO TLB entry. The PCOM (Purge Command Register)
+ * is to purge stale entries in the IO TLB when unmapping entries.
+ *
+ * The PCOM register supports purging of multiple pages, with a minium
+ * of 1 page and a maximum of 2GB. Hardware requires the address be
+ * aligned to the size of the range being purged. The size of the range
+ * must be a power of 2. The "Cool perf optimization" in the
+ * allocation routine helps keep that true.
+ */
+static SBA_INLINE void
+sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
+{
+ u32 iovp = (u32) SBA_IOVP(ioc,iova);
+
+ int off = PDIR_INDEX(iovp);
+
+ /* Must be non-zero and rounded up */
+ ASSERT(byte_cnt > 0);
+ ASSERT(0 == (byte_cnt & ~iovp_mask));
+
+#ifdef ASSERT_PDIR_SANITY
+ /* Assert first pdir entry is set */
+ if (!(ioc->pdir_base[off] >> 60)) {
+ sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp));
+ }
+#endif
+
+ if (byte_cnt <= iovp_size)
+ {
+ ASSERT(off < ioc->pdir_size);
+
+ iovp |= iovp_shift; /* set "size" field for PCOM */
+
+#ifndef FULL_VALID_PDIR
+ /*
+ ** clear I/O PDIR entry "valid" bit
+ ** Do NOT clear the rest - save it for debugging.
+ ** We should only clear bits that have previously
+ ** been enabled.
+ */
+ ioc->pdir_base[off] &= ~(0x80000000000000FFULL);
+#else
+ /*
+ ** If we want to maintain the PDIR as valid, put in
+ ** the spill page so devices prefetching won't
+ ** cause a hard fail.
+ */
+ ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page);
+#endif
+ } else {
+ u32 t = get_iovp_order(byte_cnt) + iovp_shift;
+
+ iovp |= t;
+ ASSERT(t <= 31); /* 2GB! Max value of "size" field */
+
+ do {
+ /* verify this pdir entry is enabled */
+ ASSERT(ioc->pdir_base[off] >> 63);
+#ifndef FULL_VALID_PDIR
+ /* clear I/O Pdir entry "valid" bit first */
+ ioc->pdir_base[off] &= ~(0x80000000000000FFULL);
+#else
+ ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page);
+#endif
+ off++;
+ byte_cnt -= iovp_size;
+ } while (byte_cnt > 0);
+ }
+
+ WRITE_REG(iovp | ioc->ibase, ioc->ioc_hpa+IOC_PCOM);
+}
+
+/**
+ * sba_map_single - map one buffer and return IOVA for DMA
+ * @dev: instance of PCI owned by the driver that's asking.
+ * @addr: driver buffer to map.
+ * @size: number of bytes to map in driver buffer.
+ * @dir: R/W or both.
+ *
+ * See Documentation/DMA-mapping.txt
+ */
+dma_addr_t
+sba_map_single(struct device *dev, void *addr, size_t size, int dir)
+{
+ struct ioc *ioc;
+ dma_addr_t iovp;
+ dma_addr_t offset;
+ u64 *pdir_start;
+ int pide;
+#ifdef ASSERT_PDIR_SANITY
+ unsigned long flags;
+#endif
+#ifdef ALLOW_IOV_BYPASS
+ unsigned long pci_addr = virt_to_phys(addr);
+#endif
+
+#ifdef ALLOW_IOV_BYPASS
+ ASSERT(to_pci_dev(dev)->dma_mask);
+ /*
+ ** Check if the PCI device can DMA to ptr... if so, just return ptr
+ */
+ if (likely((pci_addr & ~to_pci_dev(dev)->dma_mask) == 0)) {
+ /*
+ ** Device is bit capable of DMA'ing to the buffer...
+ ** just return the PCI address of ptr
+ */
+ DBG_BYPASS("sba_map_single() bypass mask/addr: 0x%lx/0x%lx\n",
+ to_pci_dev(dev)->dma_mask, pci_addr);
+ return pci_addr;
+ }
+#endif
+ ioc = GET_IOC(dev);
+ ASSERT(ioc);
+
+ prefetch(ioc->res_hint);
+
+ ASSERT(size > 0);
+ ASSERT(size <= DMA_CHUNK_SIZE);
+
+ /* save offset bits */
+ offset = ((dma_addr_t) (long) addr) & ~iovp_mask;
+
+ /* round up to nearest iovp_size */
+ size = (size + offset + ~iovp_mask) & iovp_mask;
+
+#ifdef ASSERT_PDIR_SANITY
+ spin_lock_irqsave(&ioc->res_lock, flags);
+ if (sba_check_pdir(ioc,"Check before sba_map_single()"))
+ panic("Sanity check failed");
+ spin_unlock_irqrestore(&ioc->res_lock, flags);
+#endif
+
+ pide = sba_alloc_range(ioc, size);
+
+ iovp = (dma_addr_t) pide << iovp_shift;
+
+ DBG_RUN("%s() 0x%p -> 0x%lx\n",
+ __FUNCTION__, addr, (long) iovp | offset);
+
+ pdir_start = &(ioc->pdir_base[pide]);
+
+ while (size > 0) {
+ ASSERT(((u8 *)pdir_start)[7] == 0); /* verify availability */
+ sba_io_pdir_entry(pdir_start, (unsigned long) addr);
+
+ DBG_RUN(" pdir 0x%p %lx\n", pdir_start, *pdir_start);
+
+ addr += iovp_size;
+ size -= iovp_size;
+ pdir_start++;
+ }
+ /* force pdir update */
+ wmb();
+
+ /* form complete address */
+#ifdef ASSERT_PDIR_SANITY
+ spin_lock_irqsave(&ioc->res_lock, flags);
+ sba_check_pdir(ioc,"Check after sba_map_single()");
+ spin_unlock_irqrestore(&ioc->res_lock, flags);
+#endif
+ return SBA_IOVA(ioc, iovp, offset);
+}
+
+/**
+ * sba_unmap_single - unmap one IOVA and free resources
+ * @dev: instance of PCI owned by the driver that's asking.
+ * @iova: IOVA of driver buffer previously mapped.
+ * @size: number of bytes mapped in driver buffer.
+ * @dir: R/W or both.
+ *
+ * See Documentation/DMA-mapping.txt
+ */
+void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
+{
+ struct ioc *ioc;
+#if DELAYED_RESOURCE_CNT > 0
+ struct sba_dma_pair *d;
+#endif
+ unsigned long flags;
+ dma_addr_t offset;
+
+ ioc = GET_IOC(dev);
+ ASSERT(ioc);
+
+#ifdef ALLOW_IOV_BYPASS
+ if (likely((iova & ioc->imask) != ioc->ibase)) {
+ /*
+ ** Address does not fall w/in IOVA, must be bypassing
+ */
+ DBG_BYPASS("sba_unmap_single() bypass addr: 0x%lx\n", iova);
+
+#ifdef ENABLE_MARK_CLEAN
+ if (dir == DMA_FROM_DEVICE) {
+ mark_clean(phys_to_virt(iova), size);
+ }
+#endif
+ return;
+ }
+#endif
+ offset = iova & ~iovp_mask;
+
+ DBG_RUN("%s() iovp 0x%lx/%x\n",
+ __FUNCTION__, (long) iova, size);
+
+ iova ^= offset; /* clear offset bits */
+ size += offset;
+ size = ROUNDUP(size, iovp_size);
+
+
+#if DELAYED_RESOURCE_CNT > 0
+ spin_lock_irqsave(&ioc->saved_lock, flags);
+ d = &(ioc->saved[ioc->saved_cnt]);
+ d->iova = iova;
+ d->size = size;
+ if (unlikely(++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT)) {
+ int cnt = ioc->saved_cnt;
+ spin_lock(&ioc->res_lock);
+ while (cnt--) {
+ sba_mark_invalid(ioc, d->iova, d->size);
+ sba_free_range(ioc, d->iova, d->size);
+ d--;
+ }
+ ioc->saved_cnt = 0;
+ READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */
+ spin_unlock(&ioc->res_lock);
+ }
+ spin_unlock_irqrestore(&ioc->saved_lock, flags);
+#else /* DELAYED_RESOURCE_CNT == 0 */
+ spin_lock_irqsave(&ioc->res_lock, flags);
+ sba_mark_invalid(ioc, iova, size);
+ sba_free_range(ioc, iova, size);
+ READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */
+ spin_unlock_irqrestore(&ioc->res_lock, flags);
+#endif /* DELAYED_RESOURCE_CNT == 0 */
+#ifdef ENABLE_MARK_CLEAN
+ if (dir == DMA_FROM_DEVICE) {
+ u32 iovp = (u32) SBA_IOVP(ioc,iova);
+ int off = PDIR_INDEX(iovp);
+ void *addr;
+
+ if (size <= iovp_size) {
+ addr = phys_to_virt(ioc->pdir_base[off] &
+ ~0xE000000000000FFFULL);
+ mark_clean(addr, size);
+ } else {
+ size_t byte_cnt = size;
+
+ do {
+ addr = phys_to_virt(ioc->pdir_base[off] &
+ ~0xE000000000000FFFULL);
+ mark_clean(addr, min(byte_cnt, iovp_size));
+ off++;
+ byte_cnt -= iovp_size;
+
+ } while (byte_cnt > 0);
+ }
+ }
+#endif
+}
+
+
+/**
+ * sba_alloc_coherent - allocate/map shared mem for DMA
+ * @dev: instance of PCI owned by the driver that's asking.
+ * @size: number of bytes mapped in driver buffer.
+ * @dma_handle: IOVA of new buffer.
+ *
+ * See Documentation/DMA-mapping.txt
+ */
+void *
+sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags)
+{
+ struct ioc *ioc;
+ void *addr;
+
+ ioc = GET_IOC(dev);
+ ASSERT(ioc);
+
+#ifdef CONFIG_NUMA
+ {
+ struct page *page;
+ page = alloc_pages_node(ioc->node == MAX_NUMNODES ?
+ numa_node_id() : ioc->node, flags,
+ get_order(size));
+
+ if (unlikely(!page))
+ return NULL;
+
+ addr = page_address(page);
+ }
+#else
+ addr = (void *) __get_free_pages(flags, get_order(size));
+#endif
+ if (unlikely(!addr))
+ return NULL;
+
+ memset(addr, 0, size);
+ *dma_handle = virt_to_phys(addr);
+
+#ifdef ALLOW_IOV_BYPASS
+ ASSERT(dev->coherent_dma_mask);
+ /*
+ ** Check if the PCI device can DMA to ptr... if so, just return ptr
+ */
+ if (likely((*dma_handle & ~dev->coherent_dma_mask) == 0)) {
+ DBG_BYPASS("sba_alloc_coherent() bypass mask/addr: 0x%lx/0x%lx\n",
+ dev->coherent_dma_mask, *dma_handle);
+
+ return addr;
+ }
+#endif
+
+ /*
+ * If device can't bypass or bypass is disabled, pass the 32bit fake
+ * device to map single to get an iova mapping.
+ */
+ *dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0);
+
+ return addr;
+}
+
+
+/**
+ * sba_free_coherent - free/unmap shared mem for DMA
+ * @dev: instance of PCI owned by the driver that's asking.
+ * @size: number of bytes mapped in driver buffer.
+ * @vaddr: virtual address IOVA of "consistent" buffer.
+ * @dma_handler: IO virtual address of "consistent" buffer.
+ *
+ * See Documentation/DMA-mapping.txt
+ */
+void sba_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle)
+{
+ sba_unmap_single(dev, dma_handle, size, 0);
+ free_pages((unsigned long) vaddr, get_order(size));
+}
+
+
+/*
+** Since 0 is a valid pdir_base index value, can't use that
+** to determine if a value is valid or not. Use a flag to indicate
+** the SG list entry contains a valid pdir index.
+*/
+#define PIDE_FLAG 0x1UL
+
+#ifdef DEBUG_LARGE_SG_ENTRIES
+int dump_run_sg = 0;
+#endif
+
+
+/**
+ * sba_fill_pdir - write allocated SG entries into IO PDIR
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @startsg: list of IOVA/size pairs
+ * @nents: number of entries in startsg list
+ *
+ * Take preprocessed SG list and write corresponding entries
+ * in the IO PDIR.
+ */
+
+static SBA_INLINE int
+sba_fill_pdir(
+ struct ioc *ioc,
+ struct scatterlist *startsg,
+ int nents)
+{
+ struct scatterlist *dma_sg = startsg; /* pointer to current DMA */
+ int n_mappings = 0;
+ u64 *pdirp = NULL;
+ unsigned long dma_offset = 0;
+
+ dma_sg--;
+ while (nents-- > 0) {
+ int cnt = startsg->dma_length;
+ startsg->dma_length = 0;
+
+#ifdef DEBUG_LARGE_SG_ENTRIES
+ if (dump_run_sg)
+ printk(" %2d : %08lx/%05x %p\n",
+ nents, startsg->dma_address, cnt,
+ sba_sg_address(startsg));
+#else
+ DBG_RUN_SG(" %d : %08lx/%05x %p\n",
+ nents, startsg->dma_address, cnt,
+ sba_sg_address(startsg));
+#endif
+ /*
+ ** Look for the start of a new DMA stream
+ */
+ if (startsg->dma_address & PIDE_FLAG) {
+ u32 pide = startsg->dma_address & ~PIDE_FLAG;
+ dma_offset = (unsigned long) pide & ~iovp_mask;
+ startsg->dma_address = 0;
+ dma_sg++;
+ dma_sg->dma_address = pide | ioc->ibase;
+ pdirp = &(ioc->pdir_base[pide >> iovp_shift]);
+ n_mappings++;
+ }
+
+ /*
+ ** Look for a VCONTIG chunk
+ */
+ if (cnt) {
+ unsigned long vaddr = (unsigned long) sba_sg_address(startsg);
+ ASSERT(pdirp);
+
+ /* Since multiple Vcontig blocks could make up
+ ** one DMA stream, *add* cnt to dma_len.
+ */
+ dma_sg->dma_length += cnt;
+ cnt += dma_offset;
+ dma_offset=0; /* only want offset on first chunk */
+ cnt = ROUNDUP(cnt, iovp_size);
+ do {
+ sba_io_pdir_entry(pdirp, vaddr);
+ vaddr += iovp_size;
+ cnt -= iovp_size;
+ pdirp++;
+ } while (cnt > 0);
+ }
+ startsg++;
+ }
+ /* force pdir update */
+ wmb();
+
+#ifdef DEBUG_LARGE_SG_ENTRIES
+ dump_run_sg = 0;
+#endif
+ return(n_mappings);
+}
+
+
+/*
+** Two address ranges are DMA contiguous *iff* "end of prev" and
+** "start of next" are both on an IOV page boundary.
+**
+** (shift left is a quick trick to mask off upper bits)
+*/
+#define DMA_CONTIG(__X, __Y) \
+ (((((unsigned long) __X) | ((unsigned long) __Y)) << (BITS_PER_LONG - iovp_shift)) == 0UL)
+
+
+/**
+ * sba_coalesce_chunks - preprocess the SG list
+ * @ioc: IO MMU structure which owns the pdir we are interested in.
+ * @startsg: list of IOVA/size pairs
+ * @nents: number of entries in startsg list
+ *
+ * First pass is to walk the SG list and determine where the breaks are
+ * in the DMA stream. Allocates PDIR entries but does not fill them.
+ * Returns the number of DMA chunks.
+ *
+ * Doing the fill separate from the coalescing/allocation keeps the
+ * code simpler. Future enhancement could make one pass through
+ * the sglist do both.
+ */
+static SBA_INLINE int
+sba_coalesce_chunks( struct ioc *ioc,
+ struct scatterlist *startsg,
+ int nents)
+{
+ struct scatterlist *vcontig_sg; /* VCONTIG chunk head */
+ unsigned long vcontig_len; /* len of VCONTIG chunk */
+ unsigned long vcontig_end;
+ struct scatterlist *dma_sg; /* next DMA stream head */
+ unsigned long dma_offset, dma_len; /* start/len of DMA stream */
+ int n_mappings = 0;
+
+ while (nents > 0) {
+ unsigned long vaddr = (unsigned long) sba_sg_address(startsg);
+
+ /*
+ ** Prepare for first/next DMA stream
+ */
+ dma_sg = vcontig_sg = startsg;
+ dma_len = vcontig_len = vcontig_end = startsg->length;
+ vcontig_end += vaddr;
+ dma_offset = vaddr & ~iovp_mask;
+
+ /* PARANOID: clear entries */
+ startsg->dma_address = startsg->dma_length = 0;
+
+ /*
+ ** This loop terminates one iteration "early" since
+ ** it's always looking one "ahead".
+ */
+ while (--nents > 0) {
+ unsigned long vaddr; /* tmp */
+
+ startsg++;
+
+ /* PARANOID */
+ startsg->dma_address = startsg->dma_length = 0;
+
+ /* catch brokenness in SCSI layer */
+ ASSERT(startsg->length <= DMA_CHUNK_SIZE);
+
+ /*
+ ** First make sure current dma stream won't
+ ** exceed DMA_CHUNK_SIZE if we coalesce the
+ ** next entry.
+ */
+ if (((dma_len + dma_offset + startsg->length + ~iovp_mask) & iovp_mask)
+ > DMA_CHUNK_SIZE)
+ break;
+
+ /*
+ ** Then look for virtually contiguous blocks.
+ **
+ ** append the next transaction?
+ */
+ vaddr = (unsigned long) sba_sg_address(startsg);
+ if (vcontig_end == vaddr)
+ {
+ vcontig_len += startsg->length;
+ vcontig_end += startsg->length;
+ dma_len += startsg->length;
+ continue;
+ }
+
+#ifdef DEBUG_LARGE_SG_ENTRIES
+ dump_run_sg = (vcontig_len > iovp_size);
+#endif
+
+ /*
+ ** Not virtually contigous.
+ ** Terminate prev chunk.
+ ** Start a new chunk.
+ **
+ ** Once we start a new VCONTIG chunk, dma_offset
+ ** can't change. And we need the offset from the first
+ ** chunk - not the last one. Ergo Successive chunks
+ ** must start on page boundaries and dove tail
+ ** with it's predecessor.
+ */
+ vcontig_sg->dma_length = vcontig_len;
+
+ vcontig_sg = startsg;
+ vcontig_len = startsg->length;
+
+ /*
+ ** 3) do the entries end/start on page boundaries?
+ ** Don't update vcontig_end until we've checked.
+ */
+ if (DMA_CONTIG(vcontig_end, vaddr))
+ {
+ vcontig_end = vcontig_len + vaddr;
+ dma_len += vcontig_len;
+ continue;
+ } else {
+ break;
+ }
+ }
+
+ /*
+ ** End of DMA Stream
+ ** Terminate last VCONTIG block.
+ ** Allocate space for DMA stream.
+ */
+ vcontig_sg->dma_length = vcontig_len;
+ dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask;
+ ASSERT(dma_len <= DMA_CHUNK_SIZE);
+ dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG
+ | (sba_alloc_range(ioc, dma_len) << iovp_shift)
+ | dma_offset);
+ n_mappings++;
+ }
+
+ return n_mappings;
+}
+
+
+/**
+ * sba_map_sg - map Scatter/Gather list
+ * @dev: instance of PCI owned by the driver that's asking.
+ * @sglist: array of buffer/length pairs
+ * @nents: number of entries in list
+ * @dir: R/W or both.
+ *
+ * See Documentation/DMA-mapping.txt
+ */
+int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int dir)
+{
+ struct ioc *ioc;
+ int coalesced, filled = 0;
+#ifdef ASSERT_PDIR_SANITY
+ unsigned long flags;
+#endif
+#ifdef ALLOW_IOV_BYPASS_SG
+ struct scatterlist *sg;
+#endif
+
+ DBG_RUN_SG("%s() START %d entries\n", __FUNCTION__, nents);
+ ioc = GET_IOC(dev);
+ ASSERT(ioc);
+
+#ifdef ALLOW_IOV_BYPASS_SG
+ ASSERT(to_pci_dev(dev)->dma_mask);
+ if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) {
+ for (sg = sglist ; filled < nents ; filled++, sg++){
+ sg->dma_length = sg->length;
+ sg->dma_address = virt_to_phys(sba_sg_address(sg));
+ }
+ return filled;
+ }
+#endif
+ /* Fast path single entry scatterlists. */
+ if (nents == 1) {
+ sglist->dma_length = sglist->length;
+ sglist->dma_address = sba_map_single(dev, sba_sg_address(sglist), sglist->length, dir);
+ return 1;
+ }
+
+#ifdef ASSERT_PDIR_SANITY
+ spin_lock_irqsave(&ioc->res_lock, flags);
+ if (sba_check_pdir(ioc,"Check before sba_map_sg()"))
+ {
+ sba_dump_sg(ioc, sglist, nents);
+ panic("Check before sba_map_sg()");
+ }
+ spin_unlock_irqrestore(&ioc->res_lock, flags);
+#endif
+
+ prefetch(ioc->res_hint);
+
+ /*
+ ** First coalesce the chunks and allocate I/O pdir space
+ **
+ ** If this is one DMA stream, we can properly map using the
+ ** correct virtual address associated with each DMA page.
+ ** w/o this association, we wouldn't have coherent DMA!
+ ** Access to the virtual address is what forces a two pass algorithm.
+ */
+ coalesced = sba_coalesce_chunks(ioc, sglist, nents);
+
+ /*
+ ** Program the I/O Pdir
+ **
+ ** map the virtual addresses to the I/O Pdir
+ ** o dma_address will contain the pdir index
+ ** o dma_len will contain the number of bytes to map
+ ** o address contains the virtual address.
+ */
+ filled = sba_fill_pdir(ioc, sglist, nents);
+
+#ifdef ASSERT_PDIR_SANITY
+ spin_lock_irqsave(&ioc->res_lock, flags);
+ if (sba_check_pdir(ioc,"Check after sba_map_sg()"))
+ {
+ sba_dump_sg(ioc, sglist, nents);
+ panic("Check after sba_map_sg()\n");
+ }
+ spin_unlock_irqrestore(&ioc->res_lock, flags);
+#endif
+
+ ASSERT(coalesced == filled);
+ DBG_RUN_SG("%s() DONE %d mappings\n", __FUNCTION__, filled);
+
+ return filled;
+}
+
+
+/**
+ * sba_unmap_sg - unmap Scatter/Gather list
+ * @dev: instance of PCI owned by the driver that's asking.
+ * @sglist: array of buffer/length pairs
+ * @nents: number of entries in list
+ * @dir: R/W or both.
+ *
+ * See Documentation/DMA-mapping.txt
+ */
+void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
+{
+#ifdef ASSERT_PDIR_SANITY
+ struct ioc *ioc;
+ unsigned long flags;
+#endif
+
+ DBG_RUN_SG("%s() START %d entries, %p,%x\n",
+ __FUNCTION__, nents, sba_sg_address(sglist), sglist->length);
+
+#ifdef ASSERT_PDIR_SANITY
+ ioc = GET_IOC(dev);
+ ASSERT(ioc);
+
+ spin_lock_irqsave(&ioc->res_lock, flags);
+ sba_check_pdir(ioc,"Check before sba_unmap_sg()");
+ spin_unlock_irqrestore(&ioc->res_lock, flags);
+#endif
+
+ while (nents && sglist->dma_length) {
+
+ sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir);
+ sglist++;
+ nents--;
+ }
+
+ DBG_RUN_SG("%s() DONE (nents %d)\n", __FUNCTION__, nents);
+
+#ifdef ASSERT_PDIR_SANITY
+ spin_lock_irqsave(&ioc->res_lock, flags);
+ sba_check_pdir(ioc,"Check after sba_unmap_sg()");
+ spin_unlock_irqrestore(&ioc->res_lock, flags);
+#endif
+
+}
+
+/**************************************************************
+*
+* Initialization and claim
+*
+***************************************************************/
+
+static void __init
+ioc_iova_init(struct ioc *ioc)
+{
+ int tcnfg;
+ int agp_found = 0;
+ struct pci_dev *device = NULL;
+#ifdef FULL_VALID_PDIR
+ unsigned long index;
+#endif
+
+ /*
+ ** Firmware programs the base and size of a "safe IOVA space"
+ ** (one that doesn't overlap memory or LMMIO space) in the
+ ** IBASE and IMASK registers.
+ */
+ ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1UL;
+ ioc->imask = READ_REG(ioc->ioc_hpa + IOC_IMASK) | 0xFFFFFFFF00000000UL;
+
+ ioc->iov_size = ~ioc->imask + 1;
+
+ DBG_INIT("%s() hpa %p IOV base 0x%lx mask 0x%lx (%dMB)\n",
+ __FUNCTION__, ioc->ioc_hpa, ioc->ibase, ioc->imask,
+ ioc->iov_size >> 20);
+
+ switch (iovp_size) {
+ case 4*1024: tcnfg = 0; break;
+ case 8*1024: tcnfg = 1; break;
+ case 16*1024: tcnfg = 2; break;
+ case 64*1024: tcnfg = 3; break;
+ default:
+ panic(PFX "Unsupported IOTLB page size %ldK",
+ iovp_size >> 10);
+ break;
+ }
+ WRITE_REG(tcnfg, ioc->ioc_hpa + IOC_TCNFG);
+
+ ioc->pdir_size = (ioc->iov_size / iovp_size) * PDIR_ENTRY_SIZE;
+ ioc->pdir_base = (void *) __get_free_pages(GFP_KERNEL,
+ get_order(ioc->pdir_size));
+ if (!ioc->pdir_base)
+ panic(PFX "Couldn't allocate I/O Page Table\n");
+
+ memset(ioc->pdir_base, 0, ioc->pdir_size);
+
+ DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __FUNCTION__,
+ iovp_size >> 10, ioc->pdir_base, ioc->pdir_size);
+
+ ASSERT(ALIGN((unsigned long) ioc->pdir_base, 4*1024) == (unsigned long) ioc->pdir_base);
+ WRITE_REG(virt_to_phys(ioc->pdir_base), ioc->ioc_hpa + IOC_PDIR_BASE);
+
+ /*
+ ** If an AGP device is present, only use half of the IOV space
+ ** for PCI DMA. Unfortunately we can't know ahead of time
+ ** whether GART support will actually be used, for now we
+ ** can just key on an AGP device found in the system.
+ ** We program the next pdir index after we stop w/ a key for
+ ** the GART code to handshake on.
+ */
+ for_each_pci_dev(device)
+ agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP);
+
+ if (agp_found && reserve_sba_gart) {
+ printk(KERN_INFO PFX "reserving %dMb of IOVA space at 0x%lx for agpgart\n",
+ ioc->iov_size/2 >> 20, ioc->ibase + ioc->iov_size/2);
+ ioc->pdir_size /= 2;
+ ((u64 *)ioc->pdir_base)[PDIR_INDEX(ioc->iov_size/2)] = ZX1_SBA_IOMMU_COOKIE;
+ }
+#ifdef FULL_VALID_PDIR
+ /*
+ ** Check to see if the spill page has been allocated, we don't need more than
+ ** one across multiple SBAs.
+ */
+ if (!prefetch_spill_page) {
+ char *spill_poison = "SBAIOMMU POISON";
+ int poison_size = 16;
+ void *poison_addr, *addr;
+
+ addr = (void *)__get_free_pages(GFP_KERNEL, get_order(iovp_size));
+ if (!addr)
+ panic(PFX "Couldn't allocate PDIR spill page\n");
+
+ poison_addr = addr;
+ for ( ; (u64) poison_addr < addr + iovp_size; poison_addr += poison_size)
+ memcpy(poison_addr, spill_poison, poison_size);
+
+ prefetch_spill_page = virt_to_phys(addr);
+
+ DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __FUNCTION__, prefetch_spill_page);
+ }
+ /*
+ ** Set all the PDIR entries valid w/ the spill page as the target
+ */
+ for (index = 0 ; index < (ioc->pdir_size / PDIR_ENTRY_SIZE) ; index++)
+ ((u64 *)ioc->pdir_base)[index] = (0x80000000000000FF | prefetch_spill_page);
+#endif
+
+ /* Clear I/O TLB of any possible entries */
+ WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM);
+ READ_REG(ioc->ioc_hpa + IOC_PCOM);
+
+ /* Enable IOVA translation */
+ WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa + IOC_IBASE);
+ READ_REG(ioc->ioc_hpa + IOC_IBASE);
+}
+
+static void __init
+ioc_resource_init(struct ioc *ioc)
+{
+ spin_lock_init(&ioc->res_lock);
+#if DELAYED_RESOURCE_CNT > 0
+ spin_lock_init(&ioc->saved_lock);
+#endif
+
+ /* resource map size dictated by pdir_size */
+ ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */
+ ioc->res_size >>= 3; /* convert bit count to byte count */
+ DBG_INIT("%s() res_size 0x%x\n", __FUNCTION__, ioc->res_size);
+
+ ioc->res_map = (char *) __get_free_pages(GFP_KERNEL,
+ get_order(ioc->res_size));
+ if (!ioc->res_map)
+ panic(PFX "Couldn't allocate resource map\n");
+
+ memset(ioc->res_map, 0, ioc->res_size);
+ /* next available IOVP - circular search */
+ ioc->res_hint = (unsigned long *) ioc->res_map;
+
+#ifdef ASSERT_PDIR_SANITY
+ /* Mark first bit busy - ie no IOVA 0 */
+ ioc->res_map[0] = 0x1;
+ ioc->pdir_base[0] = 0x8000000000000000ULL | ZX1_SBA_IOMMU_COOKIE;
+#endif
+#ifdef FULL_VALID_PDIR
+ /* Mark the last resource used so we don't prefetch beyond IOVA space */
+ ioc->res_map[ioc->res_size - 1] |= 0x80UL; /* res_map is chars */
+ ioc->pdir_base[(ioc->pdir_size / PDIR_ENTRY_SIZE) - 1] = (0x80000000000000FF
+ | prefetch_spill_page);
+#endif
+
+ DBG_INIT("%s() res_map %x %p\n", __FUNCTION__,
+ ioc->res_size, (void *) ioc->res_map);
+}
+
+static void __init
+ioc_sac_init(struct ioc *ioc)
+{
+ struct pci_dev *sac = NULL;
+ struct pci_controller *controller = NULL;
+
+ /*
+ * pci_alloc_coherent() must return a DMA address which is
+ * SAC (single address cycle) addressable, so allocate a
+ * pseudo-device to enforce that.
+ */
+ sac = kmalloc(sizeof(*sac), GFP_KERNEL);
+ if (!sac)
+ panic(PFX "Couldn't allocate struct pci_dev");
+ memset(sac, 0, sizeof(*sac));
+
+ controller = kmalloc(sizeof(*controller), GFP_KERNEL);
+ if (!controller)
+ panic(PFX "Couldn't allocate struct pci_controller");
+ memset(controller, 0, sizeof(*controller));
+
+ controller->iommu = ioc;
+ sac->sysdata = controller;
+ sac->dma_mask = 0xFFFFFFFFUL;
+#ifdef CONFIG_PCI
+ sac->dev.bus = &pci_bus_type;
+#endif
+ ioc->sac_only_dev = sac;
+}
+
+static void __init
+ioc_zx1_init(struct ioc *ioc)
+{
+ unsigned long rope_config;
+ unsigned int i;
+
+ if (ioc->rev < 0x20)
+ panic(PFX "IOC 2.0 or later required for IOMMU support\n");
+
+ /* 38 bit memory controller + extra bit for range displaced by MMIO */
+ ioc->dma_mask = (0x1UL << 39) - 1;
+
+ /*
+ ** Clear ROPE(N)_CONFIG AO bit.
+ ** Disables "NT Ordering" (~= !"Relaxed Ordering")
+ ** Overrides bit 1 in DMA Hint Sets.
+ ** Improves netperf UDP_STREAM by ~10% for tg3 on bcm5701.
+ */
+ for (i=0; i<(8*8); i+=8) {
+ rope_config = READ_REG(ioc->ioc_hpa + IOC_ROPE0_CFG + i);
+ rope_config &= ~IOC_ROPE_AO;
+ WRITE_REG(rope_config, ioc->ioc_hpa + IOC_ROPE0_CFG + i);
+ }
+}
+
+typedef void (initfunc)(struct ioc *);
+
+struct ioc_iommu {
+ u32 func_id;
+ char *name;
+ initfunc *init;
+};
+
+static struct ioc_iommu ioc_iommu_info[] __initdata = {
+ { ZX1_IOC_ID, "zx1", ioc_zx1_init },
+ { ZX2_IOC_ID, "zx2", NULL },
+ { SX1000_IOC_ID, "sx1000", NULL },
+};
+
+static struct ioc * __init
+ioc_init(u64 hpa, void *handle)
+{
+ struct ioc *ioc;
+ struct ioc_iommu *info;
+
+ ioc = kmalloc(sizeof(*ioc), GFP_KERNEL);
+ if (!ioc)
+ return NULL;
+
+ memset(ioc, 0, sizeof(*ioc));
+
+ ioc->next = ioc_list;
+ ioc_list = ioc;
+
+ ioc->handle = handle;
+ ioc->ioc_hpa = ioremap(hpa, 0x1000);
+
+ ioc->func_id = READ_REG(ioc->ioc_hpa + IOC_FUNC_ID);
+ ioc->rev = READ_REG(ioc->ioc_hpa + IOC_FCLASS) & 0xFFUL;
+ ioc->dma_mask = 0xFFFFFFFFFFFFFFFFUL; /* conservative */
+
+ for (info = ioc_iommu_info; info < ioc_iommu_info + ARRAY_SIZE(ioc_iommu_info); info++) {
+ if (ioc->func_id == info->func_id) {
+ ioc->name = info->name;
+ if (info->init)
+ (info->init)(ioc);
+ }
+ }
+
+ iovp_size = (1 << iovp_shift);
+ iovp_mask = ~(iovp_size - 1);
+
+ DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __FUNCTION__,
+ PAGE_SIZE >> 10, iovp_size >> 10);
+
+ if (!ioc->name) {
+ ioc->name = kmalloc(24, GFP_KERNEL);
+ if (ioc->name)
+ sprintf((char *) ioc->name, "Unknown (%04x:%04x)",
+ ioc->func_id & 0xFFFF, (ioc->func_id >> 16) & 0xFFFF);
+ else
+ ioc->name = "Unknown";
+ }
+
+ ioc_iova_init(ioc);
+ ioc_resource_init(ioc);
+ ioc_sac_init(ioc);
+
+ if ((long) ~iovp_mask > (long) ia64_max_iommu_merge_mask)
+ ia64_max_iommu_merge_mask = ~iovp_mask;
+
+ printk(KERN_INFO PFX
+ "%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n",
+ ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF,
+ hpa, ioc->iov_size >> 20, ioc->ibase);
+
+ return ioc;
+}
+
+
+
+/**************************************************************************
+**
+** SBA initialization code (HW and SW)
+**
+** o identify SBA chip itself
+** o FIXME: initialize DMA hints for reasonable defaults
+**
+**************************************************************************/
+
+#ifdef CONFIG_PROC_FS
+static void *
+ioc_start(struct seq_file *s, loff_t *pos)
+{
+ struct ioc *ioc;
+ loff_t n = *pos;
+
+ for (ioc = ioc_list; ioc; ioc = ioc->next)
+ if (!n--)
+ return ioc;
+
+ return NULL;
+}
+
+static void *
+ioc_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct ioc *ioc = v;
+
+ ++*pos;
+ return ioc->next;
+}
+
+static void
+ioc_stop(struct seq_file *s, void *v)
+{
+}
+
+static int
+ioc_show(struct seq_file *s, void *v)
+{
+ struct ioc *ioc = v;
+ unsigned long *res_ptr = (unsigned long *)ioc->res_map;
+ int i, used = 0;
+
+ seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n",
+ ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF));
+#ifdef CONFIG_NUMA
+ if (ioc->node != MAX_NUMNODES)
+ seq_printf(s, "NUMA node : %d\n", ioc->node);
+#endif
+ seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024));
+ seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024);
+
+ for (i = 0; i < (ioc->res_size / sizeof(unsigned long)); ++i, ++res_ptr)
+ used += hweight64(*res_ptr);
+
+ seq_printf(s, "PDIR size : %d entries\n", ioc->pdir_size >> 3);
+ seq_printf(s, "PDIR used : %d entries\n", used);
+
+#ifdef PDIR_SEARCH_TIMING
+ {
+ unsigned long i = 0, avg = 0, min, max;
+ min = max = ioc->avg_search[0];
+ for (i = 0; i < SBA_SEARCH_SAMPLE; i++) {
+ avg += ioc->avg_search[i];
+ if (ioc->avg_search[i] > max) max = ioc->avg_search[i];
+ if (ioc->avg_search[i] < min) min = ioc->avg_search[i];
+ }
+ avg /= SBA_SEARCH_SAMPLE;
+ seq_printf(s, "Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles/IOVA page)\n",
+ min, avg, max);
+ }
+#endif
+#ifndef ALLOW_IOV_BYPASS
+ seq_printf(s, "IOVA bypass disabled\n");
+#endif
+ return 0;
+}
+
+static struct seq_operations ioc_seq_ops = {
+ .start = ioc_start,
+ .next = ioc_next,
+ .stop = ioc_stop,
+ .show = ioc_show
+};
+
+static int
+ioc_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &ioc_seq_ops);
+}
+
+static struct file_operations ioc_fops = {
+ .open = ioc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+static void __init
+ioc_proc_init(void)
+{
+ struct proc_dir_entry *dir, *entry;
+
+ dir = proc_mkdir("bus/mckinley", NULL);
+ if (!dir)
+ return;
+
+ entry = create_proc_entry(ioc_list->name, 0, dir);
+ if (entry)
+ entry->proc_fops = &ioc_fops;
+}
+#endif
+
+static void
+sba_connect_bus(struct pci_bus *bus)
+{
+ acpi_handle handle, parent;
+ acpi_status status;
+ struct ioc *ioc;
+
+ if (!PCI_CONTROLLER(bus))
+ panic(PFX "no sysdata on bus %d!\n", bus->number);
+
+ if (PCI_CONTROLLER(bus)->iommu)
+ return;
+
+ handle = PCI_CONTROLLER(bus)->acpi_handle;
+ if (!handle)
+ return;
+
+ /*
+ * The IOC scope encloses PCI root bridges in the ACPI
+ * namespace, so work our way out until we find an IOC we
+ * claimed previously.
+ */
+ do {
+ for (ioc = ioc_list; ioc; ioc = ioc->next)
+ if (ioc->handle == handle) {
+ PCI_CONTROLLER(bus)->iommu = ioc;
+ return;
+ }
+
+ status = acpi_get_parent(handle, &parent);
+ handle = parent;
+ } while (ACPI_SUCCESS(status));
+
+ printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number);
+}
+
+#ifdef CONFIG_NUMA
+static void __init
+sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle)
+{
+ struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+ union acpi_object *obj;
+ acpi_handle phandle;
+ unsigned int node;
+
+ ioc->node = MAX_NUMNODES;
+
+ /*
+ * Check for a _PXM on this node first. We don't typically see
+ * one here, so we'll end up getting it from the parent.
+ */
+ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) {
+ if (ACPI_FAILURE(acpi_get_parent(handle, &phandle)))
+ return;
+
+ /* Reset the acpi buffer */
+ buffer.length = ACPI_ALLOCATE_BUFFER;
+ buffer.pointer = NULL;
+
+ if (ACPI_FAILURE(acpi_evaluate_object(phandle, "_PXM", NULL,
+ &buffer)))
+ return;
+ }
+
+ if (!buffer.length || !buffer.pointer)
+ return;
+
+ obj = buffer.pointer;
+
+ if (obj->type != ACPI_TYPE_INTEGER ||
+ obj->integer.value >= MAX_PXM_DOMAINS) {
+ acpi_os_free(buffer.pointer);
+ return;
+ }
+
+ node = pxm_to_nid_map[obj->integer.value];
+ acpi_os_free(buffer.pointer);
+
+ if (node >= MAX_NUMNODES || !node_online(node))
+ return;
+
+ ioc->node = node;
+ return;
+}
+#else
+#define sba_map_ioc_to_node(ioc, handle)
+#endif
+
+static int __init
+acpi_sba_ioc_add(struct acpi_device *device)
+{
+ struct ioc *ioc;
+ acpi_status status;
+ u64 hpa, length;
+ struct acpi_buffer buffer;
+ struct acpi_device_info *dev_info;
+
+ status = hp_acpi_csr_space(device->handle, &hpa, &length);
+ if (ACPI_FAILURE(status))
+ return 1;
+
+ buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
+ status = acpi_get_object_info(device->handle, &buffer);
+ if (ACPI_FAILURE(status))
+ return 1;
+ dev_info = buffer.pointer;
+
+ /*
+ * For HWP0001, only SBA appears in ACPI namespace. It encloses the PCI
+ * root bridges, and its CSR space includes the IOC function.
+ */
+ if (strncmp("HWP0001", dev_info->hardware_id.value, 7) == 0) {
+ hpa += ZX1_IOC_OFFSET;
+ /* zx1 based systems default to kernel page size iommu pages */
+ if (!iovp_shift)
+ iovp_shift = min(PAGE_SHIFT, 16);
+ }
+ ACPI_MEM_FREE(dev_info);
+
+ /*
+ * default anything not caught above or specified on cmdline to 4k
+ * iommu page size
+ */
+ if (!iovp_shift)
+ iovp_shift = 12;
+
+ ioc = ioc_init(hpa, device->handle);
+ if (!ioc)
+ return 1;
+
+ /* setup NUMA node association */
+ sba_map_ioc_to_node(ioc, device->handle);
+ return 0;
+}
+
+static struct acpi_driver acpi_sba_ioc_driver = {
+ .name = "IOC IOMMU Driver",
+ .ids = "HWP0001,HWP0004",
+ .ops = {
+ .add = acpi_sba_ioc_add,
+ },
+};
+
+static int __init
+sba_init(void)
+{
+ acpi_bus_register_driver(&acpi_sba_ioc_driver);
+ if (!ioc_list)
+ return 0;
+
+#ifdef CONFIG_PCI
+ {
+ struct pci_bus *b = NULL;
+ while ((b = pci_find_next_bus(b)) != NULL)
+ sba_connect_bus(b);
+ }
+#endif
+
+#ifdef CONFIG_PROC_FS
+ ioc_proc_init();
+#endif
+ return 0;
+}
+
+subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */
+
+extern void dig_setup(char**);
+/*
+ * MAX_DMA_ADDRESS needs to be setup prior to paging_init to do any good,
+ * so we use the platform_setup hook to fix it up.
+ */
+void __init
+sba_setup(char **cmdline_p)
+{
+ MAX_DMA_ADDRESS = ~0UL;
+ dig_setup(cmdline_p);
+}
+
+static int __init
+nosbagart(char *str)
+{
+ reserve_sba_gart = 0;
+ return 1;
+}
+
+int
+sba_dma_supported (struct device *dev, u64 mask)
+{
+ /* make sure it's at least 32bit capable */
+ return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL);
+}
+
+int
+sba_dma_mapping_error (dma_addr_t dma_addr)
+{
+ return 0;
+}
+
+__setup("nosbagart", nosbagart);
+
+static int __init
+sba_page_override(char *str)
+{
+ unsigned long page_size;
+
+ page_size = memparse(str, &str);
+ switch (page_size) {
+ case 4096:
+ case 8192:
+ case 16384:
+ case 65536:
+ iovp_shift = ffs(page_size) - 1;
+ break;
+ default:
+ printk("%s: unknown/unsupported iommu page size %ld\n",
+ __FUNCTION__, page_size);
+ }
+
+ return 1;
+}
+
+__setup("sbapagesize=",sba_page_override);
+
+EXPORT_SYMBOL(sba_dma_mapping_error);
+EXPORT_SYMBOL(sba_map_single);
+EXPORT_SYMBOL(sba_unmap_single);
+EXPORT_SYMBOL(sba_map_sg);
+EXPORT_SYMBOL(sba_unmap_sg);
+EXPORT_SYMBOL(sba_dma_supported);
+EXPORT_SYMBOL(sba_alloc_coherent);
+EXPORT_SYMBOL(sba_free_coherent);
diff --git a/arch/ia64/hp/sim/Kconfig b/arch/ia64/hp/sim/Kconfig
new file mode 100644
index 00000000000..18ccb1266e1
--- /dev/null
+++ b/arch/ia64/hp/sim/Kconfig
@@ -0,0 +1,20 @@
+
+menu "HP Simulator drivers"
+ depends on IA64_HP_SIM || IA64_GENERIC
+
+config HP_SIMETH
+ bool "Simulated Ethernet "
+
+config HP_SIMSERIAL
+ bool "Simulated serial driver support"
+
+config HP_SIMSERIAL_CONSOLE
+ bool "Console for HP simulator"
+ depends on HP_SIMSERIAL
+
+config HP_SIMSCSI
+ tristate "Simulated SCSI disk"
+ depends on SCSI
+
+endmenu
+
diff --git a/arch/ia64/hp/sim/Makefile b/arch/ia64/hp/sim/Makefile
new file mode 100644
index 00000000000..d10da47931d
--- /dev/null
+++ b/arch/ia64/hp/sim/Makefile
@@ -0,0 +1,16 @@
+#
+# ia64/platform/hp/sim/Makefile
+#
+# Copyright (C) 2002 Hewlett-Packard Co.
+# David Mosberger-Tang <davidm@hpl.hp.com>
+# Copyright (C) 1999 Silicon Graphics, Inc.
+# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+#
+
+obj-y := hpsim_irq.o hpsim_setup.o hpsim.o
+obj-$(CONFIG_IA64_GENERIC) += hpsim_machvec.o
+
+obj-$(CONFIG_HP_SIMETH) += simeth.o
+obj-$(CONFIG_HP_SIMSERIAL) += simserial.o
+obj-$(CONFIG_HP_SIMSERIAL_CONSOLE) += hpsim_console.o
+obj-$(CONFIG_HP_SIMSCSI) += simscsi.o
diff --git a/arch/ia64/hp/sim/boot/Makefile b/arch/ia64/hp/sim/boot/Makefile
new file mode 100644
index 00000000000..df6e9968c84
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/Makefile
@@ -0,0 +1,37 @@
+#
+# ia64/boot/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1998, 2003 by David Mosberger-Tang <davidm@hpl.hp.com>
+#
+
+targets-$(CONFIG_IA64_HP_SIM) += bootloader
+targets := vmlinux.bin vmlinux.gz $(targets-y)
+
+quiet_cmd_cptotop = LN $@
+ cmd_cptotop = ln -f $< $@
+
+vmlinux.gz: $(obj)/vmlinux.gz $(addprefix $(obj)/,$(targets-y))
+ $(call cmd,cptotop)
+ @echo ' Kernel: $@ is ready'
+
+boot: bootloader
+
+bootloader: $(obj)/bootloader
+ $(call cmd,cptotop)
+
+$(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,gzip)
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+
+LDFLAGS_bootloader = -static -T
+
+$(obj)/bootloader: $(src)/bootloader.lds $(obj)/bootloader.o $(obj)/boot_head.o $(obj)/fw-emu.o \
+ lib/lib.a arch/ia64/lib/lib.a FORCE
+ $(call if_changed,ld)
diff --git a/arch/ia64/hp/sim/boot/boot_head.S b/arch/ia64/hp/sim/boot/boot_head.S
new file mode 100644
index 00000000000..9364199e563
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/boot_head.S
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/asmmacro.h>
+
+ .bss
+ .align 16
+stack_mem:
+ .skip 16834
+
+ .text
+
+/* This needs to be defined because lib/string.c:strlcat() calls it in case of error... */
+GLOBAL_ENTRY(printk)
+ break 0
+END(printk)
+
+GLOBAL_ENTRY(_start)
+ .prologue
+ .save rp, r0
+ .body
+ movl gp = __gp
+ movl sp = stack_mem
+ bsw.1
+ br.call.sptk.many rp=start_bootloader
+END(_start)
+
+/*
+ * Set a break point on this function so that symbols are available to set breakpoints in
+ * the kernel being debugged.
+ */
+GLOBAL_ENTRY(debug_break)
+ br.ret.sptk.many b0
+END(debug_break)
+
+GLOBAL_ENTRY(ssc)
+ .regstk 5,0,0,0
+ mov r15=in4
+ break 0x80001
+ br.ret.sptk.many b0
+END(ssc)
+
+GLOBAL_ENTRY(jmp_to_kernel)
+ .regstk 2,0,0,0
+ mov r28=in0
+ mov b7=in1
+ br.sptk.few b7
+END(jmp_to_kernel)
+
+
+GLOBAL_ENTRY(pal_emulator_static)
+ mov r8=-1
+ mov r9=256
+ ;;
+ cmp.gtu p6,p7=r9,r28 /* r28 <= 255? */
+(p6) br.cond.sptk.few static
+ ;;
+ mov r9=512
+ ;;
+ cmp.gtu p6,p7=r9,r28
+(p6) br.cond.sptk.few stacked
+ ;;
+static: cmp.eq p6,p7=6,r28 /* PAL_PTCE_INFO */
+(p7) br.cond.sptk.few 1f
+ ;;
+ mov r8=0 /* status = 0 */
+ movl r9=0x100000000 /* tc.base */
+ movl r10=0x0000000200000003 /* count[0], count[1] */
+ movl r11=0x1000000000002000 /* stride[0], stride[1] */
+ br.cond.sptk.few rp
+1: cmp.eq p6,p7=14,r28 /* PAL_FREQ_RATIOS */
+(p7) br.cond.sptk.few 1f
+ mov r8=0 /* status = 0 */
+ movl r9 =0x100000064 /* proc_ratio (1/100) */
+ movl r10=0x100000100 /* bus_ratio<<32 (1/256) */
+ movl r11=0x100000064 /* itc_ratio<<32 (1/100) */
+ ;;
+1: cmp.eq p6,p7=19,r28 /* PAL_RSE_INFO */
+(p7) br.cond.sptk.few 1f
+ mov r8=0 /* status = 0 */
+ mov r9=96 /* num phys stacked */
+ mov r10=0 /* hints */
+ mov r11=0
+ br.cond.sptk.few rp
+1: cmp.eq p6,p7=1,r28 /* PAL_CACHE_FLUSH */
+(p7) br.cond.sptk.few 1f
+ mov r9=ar.lc
+ movl r8=524288 /* flush 512k million cache lines (16MB) */
+ ;;
+ mov ar.lc=r8
+ movl r8=0xe000000000000000
+ ;;
+.loop: fc r8
+ add r8=32,r8
+ br.cloop.sptk.few .loop
+ sync.i
+ ;;
+ srlz.i
+ ;;
+ mov ar.lc=r9
+ mov r8=r0
+ ;;
+1: cmp.eq p6,p7=15,r28 /* PAL_PERF_MON_INFO */
+(p7) br.cond.sptk.few 1f
+ mov r8=0 /* status = 0 */
+ movl r9 =0x08122f04 /* generic=4 width=47 retired=8 cycles=18 */
+ mov r10=0 /* reserved */
+ mov r11=0 /* reserved */
+ mov r16=0xffff /* implemented PMC */
+ mov r17=0x3ffff /* implemented PMD */
+ add r18=8,r29 /* second index */
+ ;;
+ st8 [r29]=r16,16 /* store implemented PMC */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ ;;
+ st8 [r29]=r0,16 /* clear remaining bits */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ ;;
+ st8 [r29]=r17,16 /* store implemented PMD */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ mov r16=0xf0 /* cycles count capable PMC */
+ ;;
+ st8 [r29]=r0,16 /* clear remaining bits */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ mov r17=0xf0 /* retired bundles capable PMC */
+ ;;
+ st8 [r29]=r16,16 /* store cycles capable */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ ;;
+ st8 [r29]=r0,16 /* clear remaining bits */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ ;;
+ st8 [r29]=r17,16 /* store retired bundle capable */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ ;;
+ st8 [r29]=r0,16 /* clear remaining bits */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ ;;
+1: br.cond.sptk.few rp
+stacked:
+ br.ret.sptk.few rp
+END(pal_emulator_static)
diff --git a/arch/ia64/hp/sim/boot/bootloader.c b/arch/ia64/hp/sim/boot/bootloader.c
new file mode 100644
index 00000000000..51a7b7b4dd0
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/bootloader.c
@@ -0,0 +1,176 @@
+/*
+ * arch/ia64/hp/sim/boot/bootloader.c
+ *
+ * Loads an ELF kernel.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 01/07/99 S.Eranian modified to pass command line arguments to kernel
+ */
+struct task_struct; /* forward declaration for elf.h */
+
+#include <linux/config.h>
+#include <linux/elf.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include <asm/elf.h>
+#include <asm/intrinsics.h>
+#include <asm/pal.h>
+#include <asm/pgtable.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+
+#include "ssc.h"
+
+struct disk_req {
+ unsigned long addr;
+ unsigned len;
+};
+
+struct disk_stat {
+ int fd;
+ unsigned count;
+};
+
+extern void jmp_to_kernel (unsigned long bp, unsigned long e_entry);
+extern struct ia64_boot_param *sys_fw_init (const char *args, int arglen);
+extern void debug_break (void);
+
+static void
+cons_write (const char *buf)
+{
+ unsigned long ch;
+
+ while ((ch = *buf++) != '\0') {
+ ssc(ch, 0, 0, 0, SSC_PUTCHAR);
+ if (ch == '\n')
+ ssc('\r', 0, 0, 0, SSC_PUTCHAR);
+ }
+}
+
+#define MAX_ARGS 32
+
+void
+start_bootloader (void)
+{
+ static char mem[4096];
+ static char buffer[1024];
+ unsigned long off;
+ int fd, i;
+ struct disk_req req;
+ struct disk_stat stat;
+ struct elfhdr *elf;
+ struct elf_phdr *elf_phdr; /* program header */
+ unsigned long e_entry, e_phoff, e_phnum;
+ register struct ia64_boot_param *bp;
+ char *kpath, *args;
+ long arglen = 0;
+
+ ssc(0, 0, 0, 0, SSC_CONSOLE_INIT);
+
+ /*
+ * S.Eranian: extract the commandline argument from the simulator
+ *
+ * The expected format is as follows:
+ *
+ * kernelname args...
+ *
+ * Both are optional but you can't have the second one without the first.
+ */
+ arglen = ssc((long) buffer, 0, 0, 0, SSC_GET_ARGS);
+
+ kpath = "vmlinux";
+ args = buffer;
+ if (arglen > 0) {
+ kpath = buffer;
+ while (*args != ' ' && *args != '\0')
+ ++args, --arglen;
+ if (*args == ' ')
+ *args++ = '\0', --arglen;
+ }
+
+ if (arglen <= 0) {
+ args = "";
+ arglen = 1;
+ }
+
+ fd = ssc((long) kpath, 1, 0, 0, SSC_OPEN);
+
+ if (fd < 0) {
+ cons_write(kpath);
+ cons_write(": file not found, reboot now\n");
+ for(;;);
+ }
+ stat.fd = fd;
+ off = 0;
+
+ req.len = sizeof(mem);
+ req.addr = (long) mem;
+ ssc(fd, 1, (long) &req, off, SSC_READ);
+ ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
+
+ elf = (struct elfhdr *) mem;
+ if (elf->e_ident[0] == 0x7f && strncmp(elf->e_ident + 1, "ELF", 3) != 0) {
+ cons_write("not an ELF file\n");
+ return;
+ }
+ if (elf->e_type != ET_EXEC) {
+ cons_write("not an ELF executable\n");
+ return;
+ }
+ if (!elf_check_arch(elf)) {
+ cons_write("kernel not for this processor\n");
+ return;
+ }
+
+ e_entry = elf->e_entry;
+ e_phnum = elf->e_phnum;
+ e_phoff = elf->e_phoff;
+
+ cons_write("loading ");
+ cons_write(kpath);
+ cons_write("...\n");
+
+ for (i = 0; i < e_phnum; ++i) {
+ req.len = sizeof(*elf_phdr);
+ req.addr = (long) mem;
+ ssc(fd, 1, (long) &req, e_phoff, SSC_READ);
+ ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
+ if (stat.count != sizeof(*elf_phdr)) {
+ cons_write("failed to read phdr\n");
+ return;
+ }
+ e_phoff += sizeof(*elf_phdr);
+
+ elf_phdr = (struct elf_phdr *) mem;
+
+ if (elf_phdr->p_type != PT_LOAD)
+ continue;
+
+ req.len = elf_phdr->p_filesz;
+ req.addr = __pa(elf_phdr->p_paddr);
+ ssc(fd, 1, (long) &req, elf_phdr->p_offset, SSC_READ);
+ ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
+ memset((char *)__pa(elf_phdr->p_paddr) + elf_phdr->p_filesz, 0,
+ elf_phdr->p_memsz - elf_phdr->p_filesz);
+ }
+ ssc(fd, 0, 0, 0, SSC_CLOSE);
+
+ cons_write("starting kernel...\n");
+
+ /* fake an I/O base address: */
+ ia64_setreg(_IA64_REG_AR_KR0, 0xffffc000000UL);
+
+ bp = sys_fw_init(args, arglen);
+
+ ssc(0, (long) kpath, 0, 0, SSC_LOAD_SYMBOLS);
+
+ debug_break();
+ jmp_to_kernel((unsigned long) bp, e_entry);
+
+ cons_write("kernel returned!\n");
+ ssc(-1, 0, 0, 0, SSC_EXIT);
+}
diff --git a/arch/ia64/hp/sim/boot/bootloader.lds b/arch/ia64/hp/sim/boot/bootloader.lds
new file mode 100644
index 00000000000..69ae5853103
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/bootloader.lds
@@ -0,0 +1,65 @@
+OUTPUT_FORMAT("elf64-ia64-little")
+OUTPUT_ARCH(ia64)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0x100000;
+
+ _text = .;
+ .text : { *(__ivt_section) *(.text) }
+ _etext = .;
+
+ /* Global data */
+ _data = .;
+ .rodata : { *(.rodata) *(.rodata.*) }
+ .data : { *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS }
+ __gp = ALIGN (8) + 0x200000;
+ .got : { *(.got.plt) *(.got) }
+ /* We want the small data sections together, so single-instruction offsets
+ can access them all, and initialized data all before uninitialized, so
+ we can shorten the on-disk segment size. */
+ .sdata : { *(.sdata) }
+ _edata = .;
+
+ _bss = .;
+ .sbss : { *(.sbss) *(.scommon) }
+ .bss : { *(.bss) *(COMMON) }
+ . = ALIGN(64 / 8);
+ _end = . ;
+
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* These must appear regardless of . */
+}
diff --git a/arch/ia64/hp/sim/boot/fw-emu.c b/arch/ia64/hp/sim/boot/fw-emu.c
new file mode 100644
index 00000000000..5c46928e3dc
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/fw-emu.c
@@ -0,0 +1,398 @@
+/*
+ * PAL & SAL emulation.
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/config.h>
+
+#ifdef CONFIG_PCI
+# include <linux/pci.h>
+#endif
+
+#include <linux/efi.h>
+#include <asm/io.h>
+#include <asm/pal.h>
+#include <asm/sal.h>
+
+#include "ssc.h"
+
+#define MB (1024*1024UL)
+
+#define SIMPLE_MEMMAP 1
+
+#if SIMPLE_MEMMAP
+# define NUM_MEM_DESCS 4
+#else
+# define NUM_MEM_DESCS 16
+#endif
+
+static char fw_mem[( sizeof(struct ia64_boot_param)
+ + sizeof(efi_system_table_t)
+ + sizeof(efi_runtime_services_t)
+ + 1*sizeof(efi_config_table_t)
+ + sizeof(struct ia64_sal_systab)
+ + sizeof(struct ia64_sal_desc_entry_point)
+ + NUM_MEM_DESCS*(sizeof(efi_memory_desc_t))
+ + 1024)] __attribute__ ((aligned (8)));
+
+#define SECS_PER_HOUR (60 * 60)
+#define SECS_PER_DAY (SECS_PER_HOUR * 24)
+
+/* Compute the `struct tm' representation of *T,
+ offset OFFSET seconds east of UTC,
+ and store year, yday, mon, mday, wday, hour, min, sec into *TP.
+ Return nonzero if successful. */
+int
+offtime (unsigned long t, efi_time_t *tp)
+{
+ const unsigned short int __mon_yday[2][13] =
+ {
+ /* Normal years. */
+ { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
+ /* Leap years. */
+ { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
+ };
+ long int days, rem, y;
+ const unsigned short int *ip;
+
+ days = t / SECS_PER_DAY;
+ rem = t % SECS_PER_DAY;
+ while (rem < 0) {
+ rem += SECS_PER_DAY;
+ --days;
+ }
+ while (rem >= SECS_PER_DAY) {
+ rem -= SECS_PER_DAY;
+ ++days;
+ }
+ tp->hour = rem / SECS_PER_HOUR;
+ rem %= SECS_PER_HOUR;
+ tp->minute = rem / 60;
+ tp->second = rem % 60;
+ /* January 1, 1970 was a Thursday. */
+ y = 1970;
+
+# define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
+# define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
+# define __isleap(year) \
+ ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
+
+ while (days < 0 || days >= (__isleap (y) ? 366 : 365)) {
+ /* Guess a corrected year, assuming 365 days per year. */
+ long int yg = y + days / 365 - (days % 365 < 0);
+
+ /* Adjust DAYS and Y to match the guessed year. */
+ days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1)
+ - LEAPS_THRU_END_OF (y - 1));
+ y = yg;
+ }
+ tp->year = y;
+ ip = __mon_yday[__isleap(y)];
+ for (y = 11; days < (long int) ip[y]; --y)
+ continue;
+ days -= ip[y];
+ tp->month = y + 1;
+ tp->day = days + 1;
+ return 1;
+}
+
+extern void pal_emulator_static (void);
+
+/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */
+
+#define BUILD_CMD(addr) ((0x80000000 | (addr)) & ~3)
+
+#define REG_OFFSET(addr) (0x00000000000000FF & (addr))
+#define DEVICE_FUNCTION(addr) (0x000000000000FF00 & (addr))
+#define BUS_NUMBER(addr) (0x0000000000FF0000 & (addr))
+
+static efi_status_t
+fw_efi_get_time (efi_time_t *tm, efi_time_cap_t *tc)
+{
+#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
+ struct {
+ int tv_sec; /* must be 32bits to work */
+ int tv_usec;
+ } tv32bits;
+
+ ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD);
+
+ memset(tm, 0, sizeof(*tm));
+ offtime(tv32bits.tv_sec, tm);
+
+ if (tc)
+ memset(tc, 0, sizeof(*tc));
+#else
+# error Not implemented yet...
+#endif
+ return EFI_SUCCESS;
+}
+
+static void
+efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data)
+{
+#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
+ ssc(status, 0, 0, 0, SSC_EXIT);
+#else
+# error Not implemented yet...
+#endif
+}
+
+static efi_status_t
+efi_unimplemented (void)
+{
+ return EFI_UNSUPPORTED;
+}
+
+static struct sal_ret_values
+sal_emulator (long index, unsigned long in1, unsigned long in2,
+ unsigned long in3, unsigned long in4, unsigned long in5,
+ unsigned long in6, unsigned long in7)
+{
+ long r9 = 0;
+ long r10 = 0;
+ long r11 = 0;
+ long status;
+
+ /*
+ * Don't do a "switch" here since that gives us code that
+ * isn't self-relocatable.
+ */
+ status = 0;
+ if (index == SAL_FREQ_BASE) {
+ switch (in1) {
+ case SAL_FREQ_BASE_PLATFORM:
+ r9 = 200000000;
+ break;
+
+ case SAL_FREQ_BASE_INTERVAL_TIMER:
+ /*
+ * Is this supposed to be the cr.itc frequency
+ * or something platform specific? The SAL
+ * doc ain't exactly clear on this...
+ */
+ r9 = 700000000;
+ break;
+
+ case SAL_FREQ_BASE_REALTIME_CLOCK:
+ r9 = 1;
+ break;
+
+ default:
+ status = -1;
+ break;
+ }
+ } else if (index == SAL_SET_VECTORS) {
+ ;
+ } else if (index == SAL_GET_STATE_INFO) {
+ ;
+ } else if (index == SAL_GET_STATE_INFO_SIZE) {
+ ;
+ } else if (index == SAL_CLEAR_STATE_INFO) {
+ ;
+ } else if (index == SAL_MC_RENDEZ) {
+ ;
+ } else if (index == SAL_MC_SET_PARAMS) {
+ ;
+ } else if (index == SAL_CACHE_FLUSH) {
+ ;
+ } else if (index == SAL_CACHE_INIT) {
+ ;
+#ifdef CONFIG_PCI
+ } else if (index == SAL_PCI_CONFIG_READ) {
+ /*
+ * in1 contains the PCI configuration address and in2
+ * the size of the read. The value that is read is
+ * returned via the general register r9.
+ */
+ outl(BUILD_CMD(in1), 0xCF8);
+ if (in2 == 1) /* Reading byte */
+ r9 = inb(0xCFC + ((REG_OFFSET(in1) & 3)));
+ else if (in2 == 2) /* Reading word */
+ r9 = inw(0xCFC + ((REG_OFFSET(in1) & 2)));
+ else /* Reading dword */
+ r9 = inl(0xCFC);
+ status = PCIBIOS_SUCCESSFUL;
+ } else if (index == SAL_PCI_CONFIG_WRITE) {
+ /*
+ * in1 contains the PCI configuration address, in2 the
+ * size of the write, and in3 the actual value to be
+ * written out.
+ */
+ outl(BUILD_CMD(in1), 0xCF8);
+ if (in2 == 1) /* Writing byte */
+ outb(in3, 0xCFC + ((REG_OFFSET(in1) & 3)));
+ else if (in2 == 2) /* Writing word */
+ outw(in3, 0xCFC + ((REG_OFFSET(in1) & 2)));
+ else /* Writing dword */
+ outl(in3, 0xCFC);
+ status = PCIBIOS_SUCCESSFUL;
+#endif /* CONFIG_PCI */
+ } else if (index == SAL_UPDATE_PAL) {
+ ;
+ } else {
+ status = -1;
+ }
+ return ((struct sal_ret_values) {status, r9, r10, r11});
+}
+
+
+/*
+ * This is here to work around a bug in egcs-1.1.1b that causes the
+ * compiler to crash (seems like a bug in the new alias analysis code.
+ */
+void *
+id (long addr)
+{
+ return (void *) addr;
+}
+
+struct ia64_boot_param *
+sys_fw_init (const char *args, int arglen)
+{
+ efi_system_table_t *efi_systab;
+ efi_runtime_services_t *efi_runtime;
+ efi_config_table_t *efi_tables;
+ struct ia64_sal_systab *sal_systab;
+ efi_memory_desc_t *efi_memmap, *md;
+ unsigned long *pal_desc, *sal_desc;
+ struct ia64_sal_desc_entry_point *sal_ed;
+ struct ia64_boot_param *bp;
+ unsigned char checksum = 0;
+ char *cp, *cmd_line;
+ int i = 0;
+# define MAKE_MD(typ, attr, start, end) \
+ do { \
+ md = efi_memmap + i++; \
+ md->type = typ; \
+ md->pad = 0; \
+ md->phys_addr = start; \
+ md->virt_addr = 0; \
+ md->num_pages = (end - start) >> 12; \
+ md->attribute = attr; \
+ } while (0)
+
+ memset(fw_mem, 0, sizeof(fw_mem));
+
+ pal_desc = (unsigned long *) &pal_emulator_static;
+ sal_desc = (unsigned long *) &sal_emulator;
+
+ cp = fw_mem;
+ efi_systab = (void *) cp; cp += sizeof(*efi_systab);
+ efi_runtime = (void *) cp; cp += sizeof(*efi_runtime);
+ efi_tables = (void *) cp; cp += sizeof(*efi_tables);
+ sal_systab = (void *) cp; cp += sizeof(*sal_systab);
+ sal_ed = (void *) cp; cp += sizeof(*sal_ed);
+ efi_memmap = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap);
+ bp = (void *) cp; cp += sizeof(*bp);
+ cmd_line = (void *) cp;
+
+ if (args) {
+ if (arglen >= 1024)
+ arglen = 1023;
+ memcpy(cmd_line, args, arglen);
+ } else {
+ arglen = 0;
+ }
+ cmd_line[arglen] = '\0';
+
+ memset(efi_systab, 0, sizeof(efi_systab));
+ efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE;
+ efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION;
+ efi_systab->hdr.headersize = sizeof(efi_systab->hdr);
+ efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0");
+ efi_systab->fw_revision = 1;
+ efi_systab->runtime = (void *) __pa(efi_runtime);
+ efi_systab->nr_tables = 1;
+ efi_systab->tables = __pa(efi_tables);
+
+ efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE;
+ efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION;
+ efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr);
+ efi_runtime->get_time = __pa(&fw_efi_get_time);
+ efi_runtime->set_time = __pa(&efi_unimplemented);
+ efi_runtime->get_wakeup_time = __pa(&efi_unimplemented);
+ efi_runtime->set_wakeup_time = __pa(&efi_unimplemented);
+ efi_runtime->set_virtual_address_map = __pa(&efi_unimplemented);
+ efi_runtime->get_variable = __pa(&efi_unimplemented);
+ efi_runtime->get_next_variable = __pa(&efi_unimplemented);
+ efi_runtime->set_variable = __pa(&efi_unimplemented);
+ efi_runtime->get_next_high_mono_count = __pa(&efi_unimplemented);
+ efi_runtime->reset_system = __pa(&efi_reset_system);
+
+ efi_tables->guid = SAL_SYSTEM_TABLE_GUID;
+ efi_tables->table = __pa(sal_systab);
+
+ /* fill in the SAL system table: */
+ memcpy(sal_systab->signature, "SST_", 4);
+ sal_systab->size = sizeof(*sal_systab);
+ sal_systab->sal_rev_minor = 1;
+ sal_systab->sal_rev_major = 0;
+ sal_systab->entry_count = 1;
+
+#ifdef CONFIG_IA64_GENERIC
+ strcpy(sal_systab->oem_id, "Generic");
+ strcpy(sal_systab->product_id, "IA-64 system");
+#endif
+
+#ifdef CONFIG_IA64_HP_SIM
+ strcpy(sal_systab->oem_id, "Hewlett-Packard");
+ strcpy(sal_systab->product_id, "HP-simulator");
+#endif
+
+#ifdef CONFIG_IA64_SDV
+ strcpy(sal_systab->oem_id, "Intel");
+ strcpy(sal_systab->product_id, "SDV");
+#endif
+
+ /* fill in an entry point: */
+ sal_ed->type = SAL_DESC_ENTRY_POINT;
+ sal_ed->pal_proc = __pa(pal_desc[0]);
+ sal_ed->sal_proc = __pa(sal_desc[0]);
+ sal_ed->gp = __pa(sal_desc[1]);
+
+ for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp)
+ checksum += *cp;
+
+ sal_systab->checksum = -checksum;
+
+#if SIMPLE_MEMMAP
+ /* simulate free memory at physical address zero */
+ MAKE_MD(EFI_BOOT_SERVICES_DATA, EFI_MEMORY_WB, 0*MB, 1*MB);
+ MAKE_MD(EFI_PAL_CODE, EFI_MEMORY_WB, 1*MB, 2*MB);
+ MAKE_MD(EFI_CONVENTIONAL_MEMORY, EFI_MEMORY_WB, 2*MB, 130*MB);
+ MAKE_MD(EFI_CONVENTIONAL_MEMORY, EFI_MEMORY_WB, 4096*MB, 4128*MB);
+#else
+ MAKE_MD( 4, 0x9, 0x0000000000000000, 0x0000000000001000);
+ MAKE_MD( 7, 0x9, 0x0000000000001000, 0x000000000008a000);
+ MAKE_MD( 4, 0x9, 0x000000000008a000, 0x00000000000a0000);
+ MAKE_MD( 5, 0x8000000000000009, 0x00000000000c0000, 0x0000000000100000);
+ MAKE_MD( 7, 0x9, 0x0000000000100000, 0x0000000004400000);
+ MAKE_MD( 2, 0x9, 0x0000000004400000, 0x0000000004be5000);
+ MAKE_MD( 7, 0x9, 0x0000000004be5000, 0x000000007f77e000);
+ MAKE_MD( 6, 0x8000000000000009, 0x000000007f77e000, 0x000000007fb94000);
+ MAKE_MD( 6, 0x8000000000000009, 0x000000007fb94000, 0x000000007fb95000);
+ MAKE_MD( 6, 0x8000000000000009, 0x000000007fb95000, 0x000000007fc00000);
+ MAKE_MD(13, 0x8000000000000009, 0x000000007fc00000, 0x000000007fc3a000);
+ MAKE_MD( 7, 0x9, 0x000000007fc3a000, 0x000000007fea0000);
+ MAKE_MD( 5, 0x8000000000000009, 0x000000007fea0000, 0x000000007fea8000);
+ MAKE_MD( 7, 0x9, 0x000000007fea8000, 0x000000007feab000);
+ MAKE_MD( 5, 0x8000000000000009, 0x000000007feab000, 0x000000007ffff000);
+ MAKE_MD( 7, 0x9, 0x00000000ff400000, 0x0000000104000000);
+#endif
+
+ bp->efi_systab = __pa(&fw_mem);
+ bp->efi_memmap = __pa(efi_memmap);
+ bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t);
+ bp->efi_memdesc_size = sizeof(efi_memory_desc_t);
+ bp->efi_memdesc_version = 1;
+ bp->command_line = __pa(cmd_line);
+ bp->console_info.num_cols = 80;
+ bp->console_info.num_rows = 25;
+ bp->console_info.orig_x = 0;
+ bp->console_info.orig_y = 24;
+ bp->fpswa = 0;
+
+ return bp;
+}
diff --git a/arch/ia64/hp/sim/boot/ssc.h b/arch/ia64/hp/sim/boot/ssc.h
new file mode 100644
index 00000000000..3b94c03e43a
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/ssc.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Stephane Eranian <eranian@hpl.hp.com>
+ */
+#ifndef ssc_h
+#define ssc_h
+
+/* Simulator system calls: */
+
+#define SSC_CONSOLE_INIT 20
+#define SSC_GETCHAR 21
+#define SSC_PUTCHAR 31
+#define SSC_OPEN 50
+#define SSC_CLOSE 51
+#define SSC_READ 52
+#define SSC_WRITE 53
+#define SSC_GET_COMPLETION 54
+#define SSC_WAIT_COMPLETION 55
+#define SSC_CONNECT_INTERRUPT 58
+#define SSC_GENERATE_INTERRUPT 59
+#define SSC_SET_PERIODIC_INTERRUPT 60
+#define SSC_GET_RTC 65
+#define SSC_EXIT 66
+#define SSC_LOAD_SYMBOLS 69
+#define SSC_GET_TOD 74
+
+#define SSC_GET_ARGS 75
+
+/*
+ * Simulator system call.
+ */
+extern long ssc (long arg0, long arg1, long arg2, long arg3, int nr);
+
+#endif /* ssc_h */
diff --git a/arch/ia64/hp/sim/hpsim.S b/arch/ia64/hp/sim/hpsim.S
new file mode 100644
index 00000000000..ff16e8a857d
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim.S
@@ -0,0 +1,10 @@
+#include <asm/asmmacro.h>
+
+/*
+ * Simulator system call.
+ */
+GLOBAL_ENTRY(ia64_ssc)
+ mov r15=r36
+ break 0x80001
+ br.ret.sptk.many rp
+END(ia64_ssc)
diff --git a/arch/ia64/hp/sim/hpsim_console.c b/arch/ia64/hp/sim/hpsim_console.c
new file mode 100644
index 00000000000..5deff21e587
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_console.c
@@ -0,0 +1,65 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#include <linux/config.h>
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/tty.h>
+#include <linux/kdev_t.h>
+#include <linux/console.h>
+
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/pal.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/sal.h>
+
+#include "hpsim_ssc.h"
+
+static int simcons_init (struct console *, char *);
+static void simcons_write (struct console *, const char *, unsigned);
+static struct tty_driver *simcons_console_device (struct console *, int *);
+
+struct console hpsim_cons = {
+ .name = "simcons",
+ .write = simcons_write,
+ .device = simcons_console_device,
+ .setup = simcons_init,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+};
+
+static int
+simcons_init (struct console *cons, char *options)
+{
+ return 0;
+}
+
+static void
+simcons_write (struct console *cons, const char *buf, unsigned count)
+{
+ unsigned long ch;
+
+ while (count-- > 0) {
+ ch = *buf++;
+ ia64_ssc(ch, 0, 0, 0, SSC_PUTCHAR);
+ if (ch == '\n')
+ ia64_ssc('\r', 0, 0, 0, SSC_PUTCHAR);
+ }
+}
+
+static struct tty_driver *simcons_console_device (struct console *c, int *index)
+{
+ extern struct tty_driver *hp_simserial_driver;
+ *index = c->index;
+ return hp_simserial_driver;
+}
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
new file mode 100644
index 00000000000..c0d25a2a3e9
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -0,0 +1,51 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
+
+static unsigned int
+hpsim_irq_startup (unsigned int irq)
+{
+ return 0;
+}
+
+static void
+hpsim_irq_noop (unsigned int irq)
+{
+}
+
+static void
+hpsim_set_affinity_noop (unsigned int a, cpumask_t b)
+{
+}
+
+static struct hw_interrupt_type irq_type_hp_sim = {
+ .typename = "hpsim",
+ .startup = hpsim_irq_startup,
+ .shutdown = hpsim_irq_noop,
+ .enable = hpsim_irq_noop,
+ .disable = hpsim_irq_noop,
+ .ack = hpsim_irq_noop,
+ .end = hpsim_irq_noop,
+ .set_affinity = hpsim_set_affinity_noop,
+};
+
+void __init
+hpsim_irq_init (void)
+{
+ irq_desc_t *idesc;
+ int i;
+
+ for (i = 0; i < NR_IRQS; ++i) {
+ idesc = irq_descp(i);
+ if (idesc->handler == &no_irq_type)
+ idesc->handler = &irq_type_hp_sim;
+ }
+}
diff --git a/arch/ia64/hp/sim/hpsim_machvec.c b/arch/ia64/hp/sim/hpsim_machvec.c
new file mode 100644
index 00000000000..c2141935918
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_machvec.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME hpsim
+#define MACHVEC_PLATFORM_HEADER <asm/machvec_hpsim.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/hp/sim/hpsim_setup.c b/arch/ia64/hp/sim/hpsim_setup.c
new file mode 100644
index 00000000000..694fc86bfbd
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_setup.c
@@ -0,0 +1,52 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#include <linux/config.h>
+#include <linux/console.h>
+#include <linux/init.h>
+#include <linux/kdev_t.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/param.h>
+#include <linux/root_dev.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/pal.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/sal.h>
+
+#include "hpsim_ssc.h"
+
+void
+ia64_ssc_connect_irq (long intr, long irq)
+{
+ ia64_ssc(intr, irq, 0, 0, SSC_CONNECT_INTERRUPT);
+}
+
+void
+ia64_ctl_trace (long on)
+{
+ ia64_ssc(on, 0, 0, 0, SSC_CTL_TRACE);
+}
+
+void __init
+hpsim_setup (char **cmdline_p)
+{
+ ROOT_DEV = Root_SDA1; /* default to first SCSI drive */
+
+#ifdef CONFIG_HP_SIMSERIAL_CONSOLE
+ {
+ extern struct console hpsim_cons;
+ if (ia64_platform_is("hpsim"))
+ register_console(&hpsim_cons);
+ }
+#endif
+}
diff --git a/arch/ia64/hp/sim/hpsim_ssc.h b/arch/ia64/hp/sim/hpsim_ssc.h
new file mode 100644
index 00000000000..bfa3906274b
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_ssc.h
@@ -0,0 +1,36 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#ifndef _IA64_PLATFORM_HPSIM_SSC_H
+#define _IA64_PLATFORM_HPSIM_SSC_H
+
+/* Simulator system calls: */
+
+#define SSC_CONSOLE_INIT 20
+#define SSC_GETCHAR 21
+#define SSC_PUTCHAR 31
+#define SSC_CONNECT_INTERRUPT 58
+#define SSC_GENERATE_INTERRUPT 59
+#define SSC_SET_PERIODIC_INTERRUPT 60
+#define SSC_GET_RTC 65
+#define SSC_EXIT 66
+#define SSC_LOAD_SYMBOLS 69
+#define SSC_GET_TOD 74
+#define SSC_CTL_TRACE 76
+
+#define SSC_NETDEV_PROBE 100
+#define SSC_NETDEV_SEND 101
+#define SSC_NETDEV_RECV 102
+#define SSC_NETDEV_ATTACH 103
+#define SSC_NETDEV_DETACH 104
+
+/*
+ * Simulator system call.
+ */
+extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr);
+
+#endif /* _IA64_PLATFORM_HPSIM_SSC_H */
diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c
new file mode 100644
index 00000000000..ae84a1018a8
--- /dev/null
+++ b/arch/ia64/hp/sim/simeth.c
@@ -0,0 +1,530 @@
+/*
+ * Simulated Ethernet Driver
+ *
+ * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/in.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/notifier.h>
+#include <linux/bitops.h>
+#include <asm/system.h>
+#include <asm/irq.h>
+
+#define SIMETH_RECV_MAX 10
+
+/*
+ * Maximum possible received frame for Ethernet.
+ * We preallocate an sk_buff of that size to avoid costly
+ * memcpy for temporary buffer into sk_buff. We do basically
+ * what's done in other drivers, like eepro with a ring.
+ * The difference is, of course, that we don't have real DMA !!!
+ */
+#define SIMETH_FRAME_SIZE ETH_FRAME_LEN
+
+
+#define SSC_NETDEV_PROBE 100
+#define SSC_NETDEV_SEND 101
+#define SSC_NETDEV_RECV 102
+#define SSC_NETDEV_ATTACH 103
+#define SSC_NETDEV_DETACH 104
+
+#define NETWORK_INTR 8
+
+struct simeth_local {
+ struct net_device_stats stats;
+ int simfd; /* descriptor in the simulator */
+};
+
+static int simeth_probe1(void);
+static int simeth_open(struct net_device *dev);
+static int simeth_close(struct net_device *dev);
+static int simeth_tx(struct sk_buff *skb, struct net_device *dev);
+static int simeth_rx(struct net_device *dev);
+static struct net_device_stats *simeth_get_stats(struct net_device *dev);
+static irqreturn_t simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs);
+static void set_multicast_list(struct net_device *dev);
+static int simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr);
+
+static char *simeth_version="0.3";
+
+/*
+ * This variable is used to establish a mapping between the Linux/ia64 kernel
+ * and the host linux kernel.
+ *
+ * As of today, we support only one card, even though most of the code
+ * is ready for many more. The mapping is then:
+ * linux/ia64 -> linux/x86
+ * eth0 -> eth1
+ *
+ * In the future, we some string operations, we could easily support up
+ * to 10 cards (0-9).
+ *
+ * The default mapping can be changed on the kernel command line by
+ * specifying simeth=ethX (or whatever string you want).
+ */
+static char *simeth_device="eth0"; /* default host interface to use */
+
+
+
+static volatile unsigned int card_count; /* how many cards "found" so far */
+static int simeth_debug; /* set to 1 to get debug information */
+
+/*
+ * Used to catch IFF_UP & IFF_DOWN events
+ */
+static struct notifier_block simeth_dev_notifier = {
+ simeth_device_event,
+ 0
+};
+
+
+/*
+ * Function used when using a kernel command line option.
+ *
+ * Format: simeth=interface_name (like eth0)
+ */
+static int __init
+simeth_setup(char *str)
+{
+ simeth_device = str;
+ return 1;
+}
+
+__setup("simeth=", simeth_setup);
+
+/*
+ * Function used to probe for simeth devices when not installed
+ * as a loadable module
+ */
+
+int __init
+simeth_probe (void)
+{
+ int r;
+
+ printk(KERN_INFO "simeth: v%s\n", simeth_version);
+
+ r = simeth_probe1();
+
+ if (r == 0) register_netdevice_notifier(&simeth_dev_notifier);
+
+ return r;
+}
+
+extern long ia64_ssc (long, long, long, long, int);
+extern void ia64_ssc_connect_irq (long intr, long irq);
+
+static inline int
+netdev_probe(char *name, unsigned char *ether)
+{
+ return ia64_ssc(__pa(name), __pa(ether), 0,0, SSC_NETDEV_PROBE);
+}
+
+
+static inline int
+netdev_connect(int irq)
+{
+ /* XXX Fix me
+ * this does not support multiple cards
+ * also no return value
+ */
+ ia64_ssc_connect_irq(NETWORK_INTR, irq);
+ return 0;
+}
+
+static inline int
+netdev_attach(int fd, int irq, unsigned int ipaddr)
+{
+ /* this puts the host interface in the right mode (start interrupting) */
+ return ia64_ssc(fd, ipaddr, 0,0, SSC_NETDEV_ATTACH);
+}
+
+
+static inline int
+netdev_detach(int fd)
+{
+ /*
+ * inactivate the host interface (don't interrupt anymore) */
+ return ia64_ssc(fd, 0,0,0, SSC_NETDEV_DETACH);
+}
+
+static inline int
+netdev_send(int fd, unsigned char *buf, unsigned int len)
+{
+ return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_SEND);
+}
+
+static inline int
+netdev_read(int fd, unsigned char *buf, unsigned int len)
+{
+ return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_RECV);
+}
+
+/*
+ * Function shared with module code, so cannot be in init section
+ *
+ * So far this function "detects" only one card (test_&_set) but could
+ * be extended easily.
+ *
+ * Return:
+ * - -ENODEV is no device found
+ * - -ENOMEM is no more memory
+ * - 0 otherwise
+ */
+static int
+simeth_probe1(void)
+{
+ unsigned char mac_addr[ETH_ALEN];
+ struct simeth_local *local;
+ struct net_device *dev;
+ int fd, i, err;
+
+ /*
+ * XXX Fix me
+ * let's support just one card for now
+ */
+ if (test_and_set_bit(0, &card_count))
+ return -ENODEV;
+
+ /*
+ * check with the simulator for the device
+ */
+ fd = netdev_probe(simeth_device, mac_addr);
+ if (fd == -1)
+ return -ENODEV;
+
+ dev = alloc_etherdev(sizeof(struct simeth_local));
+ if (!dev)
+ return -ENOMEM;
+
+ memcpy(dev->dev_addr, mac_addr, sizeof(mac_addr));
+
+ local = dev->priv;
+ local->simfd = fd; /* keep track of underlying file descriptor */
+
+ dev->open = simeth_open;
+ dev->stop = simeth_close;
+ dev->hard_start_xmit = simeth_tx;
+ dev->get_stats = simeth_get_stats;
+ dev->set_multicast_list = set_multicast_list; /* no yet used */
+
+ err = register_netdev(dev);
+ if (err) {
+ free_netdev(dev);
+ return err;
+ }
+
+ dev->irq = assign_irq_vector(AUTO_ASSIGN);
+
+ /*
+ * attach the interrupt in the simulator, this does enable interrupts
+ * until a netdev_attach() is called
+ */
+ netdev_connect(dev->irq);
+
+ printk(KERN_INFO "%s: hosteth=%s simfd=%d, HwAddr",
+ dev->name, simeth_device, local->simfd);
+ for(i = 0; i < ETH_ALEN; i++) {
+ printk(" %2.2x", dev->dev_addr[i]);
+ }
+ printk(", IRQ %d\n", dev->irq);
+
+ return 0;
+}
+
+/*
+ * actually binds the device to an interrupt vector
+ */
+static int
+simeth_open(struct net_device *dev)
+{
+ if (request_irq(dev->irq, simeth_interrupt, 0, "simeth", dev)) {
+ printk(KERN_WARNING "simeth: unable to get IRQ %d.\n", dev->irq);
+ return -EAGAIN;
+ }
+
+ netif_start_queue(dev);
+
+ return 0;
+}
+
+/* copied from lapbether.c */
+static __inline__ int dev_is_ethdev(struct net_device *dev)
+{
+ return ( dev->type == ARPHRD_ETHER && strncmp(dev->name, "dummy", 5));
+}
+
+
+/*
+ * Handler for IFF_UP or IFF_DOWN
+ *
+ * The reason for that is that we don't want to be interrupted when the
+ * interface is down. There is no way to unconnect in the simualtor. Instead
+ * we use this function to shutdown packet processing in the frame filter
+ * in the simulator. Thus no interrupts are generated
+ *
+ *
+ * That's also the place where we pass the IP address of this device to the
+ * simulator so that that we can start filtering packets for it
+ *
+ * There may be a better way of doing this, but I don't know which yet.
+ */
+static int
+simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+ struct simeth_local *local;
+ struct in_device *in_dev;
+ struct in_ifaddr **ifap = NULL;
+ struct in_ifaddr *ifa = NULL;
+ int r;
+
+
+ if ( ! dev ) {
+ printk(KERN_WARNING "simeth_device_event dev=0\n");
+ return NOTIFY_DONE;
+ }
+
+ if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE;
+
+ /*
+ * Check whether or not it's for an ethernet device
+ *
+ * XXX Fixme: This works only as long as we support one
+ * type of ethernet device.
+ */
+ if ( !dev_is_ethdev(dev) ) return NOTIFY_DONE;
+
+ if ((in_dev=dev->ip_ptr) != NULL) {
+ for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next)
+ if (strcmp(dev->name, ifa->ifa_label) == 0) break;
+ }
+ if ( ifa == NULL ) {
+ printk(KERN_ERR "simeth_open: can't find device %s's ifa\n", dev->name);
+ return NOTIFY_DONE;
+ }
+
+ printk(KERN_INFO "simeth_device_event: %s ipaddr=0x%x\n",
+ dev->name, htonl(ifa->ifa_local));
+
+ /*
+ * XXX Fix me
+ * if the device was up, and we're simply reconfiguring it, not sure
+ * we get DOWN then UP.
+ */
+
+ local = dev->priv;
+ /* now do it for real */
+ r = event == NETDEV_UP ?
+ netdev_attach(local->simfd, dev->irq, htonl(ifa->ifa_local)):
+ netdev_detach(local->simfd);
+
+ printk(KERN_INFO "simeth: netdev_attach/detach: event=%s ->%d\n",
+ event == NETDEV_UP ? "attach":"detach", r);
+
+ return NOTIFY_DONE;
+}
+
+static int
+simeth_close(struct net_device *dev)
+{
+ netif_stop_queue(dev);
+
+ free_irq(dev->irq, dev);
+
+ return 0;
+}
+
+/*
+ * Only used for debug
+ */
+static void
+frame_print(unsigned char *from, unsigned char *frame, int len)
+{
+ int i;
+
+ printk("%s: (%d) %02x", from, len, frame[0] & 0xff);
+ for(i=1; i < 6; i++ ) {
+ printk(":%02x", frame[i] &0xff);
+ }
+ printk(" %2x", frame[6] &0xff);
+ for(i=7; i < 12; i++ ) {
+ printk(":%02x", frame[i] &0xff);
+ }
+ printk(" [%02x%02x]\n", frame[12], frame[13]);
+
+ for(i=14; i < len; i++ ) {
+ printk("%02x ", frame[i] &0xff);
+ if ( (i%10)==0) printk("\n");
+ }
+ printk("\n");
+}
+
+
+/*
+ * Function used to transmit of frame, very last one on the path before
+ * going to the simulator.
+ */
+static int
+simeth_tx(struct sk_buff *skb, struct net_device *dev)
+{
+ struct simeth_local *local = dev->priv;
+
+#if 0
+ /* ensure we have at least ETH_ZLEN bytes (min frame size) */
+ unsigned int length = ETH_ZLEN < skb->len ? skb->len : ETH_ZLEN;
+ /* Where do the extra padding bytes comes from inthe skbuff ? */
+#else
+ /* the real driver in the host system is going to take care of that
+ * or maybe it's the NIC itself.
+ */
+ unsigned int length = skb->len;
+#endif
+
+ local->stats.tx_bytes += skb->len;
+ local->stats.tx_packets++;
+
+
+ if (simeth_debug > 5) frame_print("simeth_tx", skb->data, length);
+
+ netdev_send(local->simfd, skb->data, length);
+
+ /*
+ * we are synchronous on write, so we don't simulate a
+ * trasnmit complete interrupt, thus we don't need to arm a tx
+ */
+
+ dev_kfree_skb(skb);
+ return 0;
+}
+
+static inline struct sk_buff *
+make_new_skb(struct net_device *dev)
+{
+ struct sk_buff *nskb;
+
+ /*
+ * The +2 is used to make sure that the IP header is nicely
+ * aligned (on 4byte boundary I assume 14+2=16)
+ */
+ nskb = dev_alloc_skb(SIMETH_FRAME_SIZE + 2);
+ if ( nskb == NULL ) {
+ printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name);
+ return NULL;
+ }
+ nskb->dev = dev;
+
+ skb_reserve(nskb, 2); /* Align IP on 16 byte boundaries */
+
+ skb_put(nskb,SIMETH_FRAME_SIZE);
+
+ return nskb;
+}
+
+/*
+ * called from interrupt handler to process a received frame
+ */
+static int
+simeth_rx(struct net_device *dev)
+{
+ struct simeth_local *local;
+ struct sk_buff *skb;
+ int len;
+ int rcv_count = SIMETH_RECV_MAX;
+
+ local = dev->priv;
+ /*
+ * the loop concept has been borrowed from other drivers
+ * looks to me like it's a throttling thing to avoid pushing to many
+ * packets at one time into the stack. Making sure we can process them
+ * upstream and make forward progress overall
+ */
+ do {
+ if ( (skb=make_new_skb(dev)) == NULL ) {
+ printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name);
+ local->stats.rx_dropped++;
+ return 0;
+ }
+ /*
+ * Read only one frame at a time
+ */
+ len = netdev_read(local->simfd, skb->data, SIMETH_FRAME_SIZE);
+ if ( len == 0 ) {
+ if ( simeth_debug > 0 ) printk(KERN_WARNING "%s: count=%d netdev_read=0\n",
+ dev->name, SIMETH_RECV_MAX-rcv_count);
+ break;
+ }
+#if 0
+ /*
+ * XXX Fix me
+ * Should really do a csum+copy here
+ */
+ memcpy(skb->data, frame, len);
+#endif
+ skb->protocol = eth_type_trans(skb, dev);
+
+ if ( simeth_debug > 6 ) frame_print("simeth_rx", skb->data, len);
+
+ /*
+ * push the packet up & trigger software interrupt
+ */
+ netif_rx(skb);
+
+ local->stats.rx_packets++;
+ local->stats.rx_bytes += len;
+
+ } while ( --rcv_count );
+
+ return len; /* 0 = nothing left to read, otherwise, we can try again */
+}
+
+/*
+ * Interrupt handler (Yes, we can do it too !!!)
+ */
+static irqreturn_t
+simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs)
+{
+ struct net_device *dev = dev_id;
+
+ if ( dev == NULL ) {
+ printk(KERN_WARNING "simeth: irq %d for unknown device\n", irq);
+ return IRQ_NONE;
+ }
+
+ /*
+ * very simple loop because we get interrupts only when receiving
+ */
+ while (simeth_rx(dev));
+ return IRQ_HANDLED;
+}
+
+static struct net_device_stats *
+simeth_get_stats(struct net_device *dev)
+{
+ struct simeth_local *local = dev->priv;
+
+ return &local->stats;
+}
+
+/* fake multicast ability */
+static void
+set_multicast_list(struct net_device *dev)
+{
+ printk(KERN_WARNING "%s: set_multicast_list called\n", dev->name);
+}
+
+__initcall(simeth_probe);
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
new file mode 100644
index 00000000000..56405dbfd73
--- /dev/null
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -0,0 +1,404 @@
+/*
+ * Simulated SCSI driver.
+ *
+ * Copyright (C) 1999, 2001-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 02/01/15 David Mosberger Updated for v2.5.1
+ * 99/12/18 David Mosberger Added support for READ10/WRITE10 needed by linux v2.3.33
+ */
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <asm/irq.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+
+#define DEBUG_SIMSCSI 0
+
+#define SIMSCSI_REQ_QUEUE_LEN 64
+#define DEFAULT_SIMSCSI_ROOT "/var/ski-disks/sd"
+
+/* Simulator system calls: */
+
+#define SSC_OPEN 50
+#define SSC_CLOSE 51
+#define SSC_READ 52
+#define SSC_WRITE 53
+#define SSC_GET_COMPLETION 54
+#define SSC_WAIT_COMPLETION 55
+
+#define SSC_WRITE_ACCESS 2
+#define SSC_READ_ACCESS 1
+
+#if DEBUG_SIMSCSI
+ int simscsi_debug;
+# define DBG simscsi_debug
+#else
+# define DBG 0
+#endif
+
+static struct Scsi_Host *host;
+
+static void simscsi_interrupt (unsigned long val);
+static DECLARE_TASKLET(simscsi_tasklet, simscsi_interrupt, 0);
+
+struct disk_req {
+ unsigned long addr;
+ unsigned len;
+};
+
+struct disk_stat {
+ int fd;
+ unsigned count;
+};
+
+extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr);
+
+static int desc[16] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+};
+
+static struct queue_entry {
+ struct scsi_cmnd *sc;
+} queue[SIMSCSI_REQ_QUEUE_LEN];
+
+static int rd, wr;
+static atomic_t num_reqs = ATOMIC_INIT(0);
+
+/* base name for default disks */
+static char *simscsi_root = DEFAULT_SIMSCSI_ROOT;
+
+#define MAX_ROOT_LEN 128
+
+/*
+ * used to setup a new base for disk images
+ * to use /foo/bar/disk[a-z] as disk images
+ * you have to specify simscsi=/foo/bar/disk on the command line
+ */
+static int __init
+simscsi_setup (char *s)
+{
+ /* XXX Fix me we may need to strcpy() ? */
+ if (strlen(s) > MAX_ROOT_LEN) {
+ printk(KERN_ERR "simscsi_setup: prefix too long---using default %s\n",
+ simscsi_root);
+ }
+ simscsi_root = s;
+ return 1;
+}
+
+__setup("simscsi=", simscsi_setup);
+
+static void
+simscsi_interrupt (unsigned long val)
+{
+ struct scsi_cmnd *sc;
+
+ while ((sc = queue[rd].sc) != 0) {
+ atomic_dec(&num_reqs);
+ queue[rd].sc = 0;
+ if (DBG)
+ printk("simscsi_interrupt: done with %ld\n", sc->serial_number);
+ (*sc->scsi_done)(sc);
+ rd = (rd + 1) % SIMSCSI_REQ_QUEUE_LEN;
+ }
+}
+
+static int
+simscsi_biosparam (struct scsi_device *sdev, struct block_device *n,
+ sector_t capacity, int ip[])
+{
+ ip[0] = 64; /* heads */
+ ip[1] = 32; /* sectors */
+ ip[2] = capacity >> 11; /* cylinders */
+ return 0;
+}
+
+static void
+simscsi_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset, unsigned long len)
+{
+ struct disk_stat stat;
+ struct disk_req req;
+
+ req.addr = __pa(sc->request_buffer);
+ req.len = len; /* # of bytes to transfer */
+
+ if (sc->request_bufflen < req.len)
+ return;
+
+ stat.fd = desc[sc->device->id];
+ if (DBG)
+ printk("simscsi_%s @ %lx (off %lx)\n",
+ mode == SSC_READ ? "read":"write", req.addr, offset);
+ ia64_ssc(stat.fd, 1, __pa(&req), offset, mode);
+ ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
+
+ if (stat.count == req.len) {
+ sc->result = GOOD;
+ } else {
+ sc->result = DID_ERROR << 16;
+ }
+}
+
+static void
+simscsi_sg_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset)
+{
+ int list_len = sc->use_sg;
+ struct scatterlist *sl = (struct scatterlist *)sc->buffer;
+ struct disk_stat stat;
+ struct disk_req req;
+
+ stat.fd = desc[sc->device->id];
+
+ while (list_len) {
+ req.addr = __pa(page_address(sl->page) + sl->offset);
+ req.len = sl->length;
+ if (DBG)
+ printk("simscsi_sg_%s @ %lx (off %lx) use_sg=%d len=%d\n",
+ mode == SSC_READ ? "read":"write", req.addr, offset,
+ list_len, sl->length);
+ ia64_ssc(stat.fd, 1, __pa(&req), offset, mode);
+ ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
+
+ /* should not happen in our case */
+ if (stat.count != req.len) {
+ sc->result = DID_ERROR << 16;
+ return;
+ }
+ offset += sl->length;
+ sl++;
+ list_len--;
+ }
+ sc->result = GOOD;
+}
+
+/*
+ * function handling both READ_6/WRITE_6 (non-scatter/gather mode)
+ * commands.
+ * Added 02/26/99 S.Eranian
+ */
+static void
+simscsi_readwrite6 (struct scsi_cmnd *sc, int mode)
+{
+ unsigned long offset;
+
+ offset = (((sc->cmnd[1] & 0x1f) << 16) | (sc->cmnd[2] << 8) | sc->cmnd[3])*512;
+ if (sc->use_sg > 0)
+ simscsi_sg_readwrite(sc, mode, offset);
+ else
+ simscsi_readwrite(sc, mode, offset, sc->cmnd[4]*512);
+}
+
+static size_t
+simscsi_get_disk_size (int fd)
+{
+ struct disk_stat stat;
+ size_t bit, sectors = 0;
+ struct disk_req req;
+ char buf[512];
+
+ /*
+ * This is a bit kludgey: the simulator doesn't provide a direct way of determining
+ * the disk size, so we do a binary search, assuming a maximum disk size of 4GB.
+ */
+ for (bit = (4UL << 30)/512; bit != 0; bit >>= 1) {
+ req.addr = __pa(&buf);
+ req.len = sizeof(buf);
+ ia64_ssc(fd, 1, __pa(&req), ((sectors | bit) - 1)*512, SSC_READ);
+ stat.fd = fd;
+ ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
+ if (stat.count == sizeof(buf))
+ sectors |= bit;
+ }
+ return sectors - 1; /* return last valid sector number */
+}
+
+static void
+simscsi_readwrite10 (struct scsi_cmnd *sc, int mode)
+{
+ unsigned long offset;
+
+ offset = ( (sc->cmnd[2] << 24) | (sc->cmnd[3] << 16)
+ | (sc->cmnd[4] << 8) | (sc->cmnd[5] << 0))*512;
+ if (sc->use_sg > 0)
+ simscsi_sg_readwrite(sc, mode, offset);
+ else
+ simscsi_readwrite(sc, mode, offset, ((sc->cmnd[7] << 8) | sc->cmnd[8])*512);
+}
+
+static int
+simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
+{
+ unsigned int target_id = sc->device->id;
+ char fname[MAX_ROOT_LEN+16];
+ size_t disk_size;
+ char *buf;
+#if DEBUG_SIMSCSI
+ register long sp asm ("sp");
+
+ if (DBG)
+ printk("simscsi_queuecommand: target=%d,cmnd=%u,sc=%lu,sp=%lx,done=%p\n",
+ target_id, sc->cmnd[0], sc->serial_number, sp, done);
+#endif
+
+ sc->result = DID_BAD_TARGET << 16;
+ sc->scsi_done = done;
+ if (target_id <= 15 && sc->device->lun == 0) {
+ switch (sc->cmnd[0]) {
+ case INQUIRY:
+ if (sc->request_bufflen < 35) {
+ break;
+ }
+ sprintf (fname, "%s%c", simscsi_root, 'a' + target_id);
+ desc[target_id] = ia64_ssc(__pa(fname), SSC_READ_ACCESS|SSC_WRITE_ACCESS,
+ 0, 0, SSC_OPEN);
+ if (desc[target_id] < 0) {
+ /* disk doesn't exist... */
+ break;
+ }
+ buf = sc->request_buffer;
+ buf[0] = 0; /* magnetic disk */
+ buf[1] = 0; /* not a removable medium */
+ buf[2] = 2; /* SCSI-2 compliant device */
+ buf[3] = 2; /* SCSI-2 response data format */
+ buf[4] = 31; /* additional length (bytes) */
+ buf[5] = 0; /* reserved */
+ buf[6] = 0; /* reserved */
+ buf[7] = 0; /* various flags */
+ memcpy(buf + 8, "HP SIMULATED DISK 0.00", 28);
+ sc->result = GOOD;
+ break;
+
+ case TEST_UNIT_READY:
+ sc->result = GOOD;
+ break;
+
+ case READ_6:
+ if (desc[target_id] < 0 )
+ break;
+ simscsi_readwrite6(sc, SSC_READ);
+ break;
+
+ case READ_10:
+ if (desc[target_id] < 0 )
+ break;
+ simscsi_readwrite10(sc, SSC_READ);
+ break;
+
+ case WRITE_6:
+ if (desc[target_id] < 0)
+ break;
+ simscsi_readwrite6(sc, SSC_WRITE);
+ break;
+
+ case WRITE_10:
+ if (desc[target_id] < 0)
+ break;
+ simscsi_readwrite10(sc, SSC_WRITE);
+ break;
+
+
+ case READ_CAPACITY:
+ if (desc[target_id] < 0 || sc->request_bufflen < 8) {
+ break;
+ }
+ buf = sc->request_buffer;
+
+ disk_size = simscsi_get_disk_size(desc[target_id]);
+
+ /* pretend to be a 1GB disk (partition table contains real stuff): */
+ buf[0] = (disk_size >> 24) & 0xff;
+ buf[1] = (disk_size >> 16) & 0xff;
+ buf[2] = (disk_size >> 8) & 0xff;
+ buf[3] = (disk_size >> 0) & 0xff;
+ /* set block size of 512 bytes: */
+ buf[4] = 0;
+ buf[5] = 0;
+ buf[6] = 2;
+ buf[7] = 0;
+ sc->result = GOOD;
+ break;
+
+ case MODE_SENSE:
+ case MODE_SENSE_10:
+ /* sd.c uses this to determine whether disk does write-caching. */
+ memset(sc->request_buffer, 0, 128);
+ sc->result = GOOD;
+ break;
+
+ case START_STOP:
+ printk(KERN_ERR "START_STOP\n");
+ break;
+
+ default:
+ panic("simscsi: unknown SCSI command %u\n", sc->cmnd[0]);
+ }
+ }
+ if (sc->result == DID_BAD_TARGET) {
+ sc->result |= DRIVER_SENSE << 24;
+ sc->sense_buffer[0] = 0x70;
+ sc->sense_buffer[2] = 0x00;
+ }
+ if (atomic_read(&num_reqs) >= SIMSCSI_REQ_QUEUE_LEN) {
+ panic("Attempt to queue command while command is pending!!");
+ }
+ atomic_inc(&num_reqs);
+ queue[wr].sc = sc;
+ wr = (wr + 1) % SIMSCSI_REQ_QUEUE_LEN;
+
+ tasklet_schedule(&simscsi_tasklet);
+ return 0;
+}
+
+static int
+simscsi_host_reset (struct scsi_cmnd *sc)
+{
+ printk(KERN_ERR "simscsi_host_reset: not implemented\n");
+ return 0;
+}
+
+static struct scsi_host_template driver_template = {
+ .name = "simulated SCSI host adapter",
+ .proc_name = "simscsi",
+ .queuecommand = simscsi_queuecommand,
+ .eh_host_reset_handler = simscsi_host_reset,
+ .bios_param = simscsi_biosparam,
+ .can_queue = SIMSCSI_REQ_QUEUE_LEN,
+ .this_id = -1,
+ .sg_tablesize = SG_ALL,
+ .max_sectors = 1024,
+ .cmd_per_lun = SIMSCSI_REQ_QUEUE_LEN,
+ .use_clustering = DISABLE_CLUSTERING,
+};
+
+static int __init
+simscsi_init(void)
+{
+ int error;
+
+ host = scsi_host_alloc(&driver_template, 0);
+ if (!host)
+ return -ENOMEM;
+
+ error = scsi_add_host(host, NULL);
+ if (!error)
+ scsi_scan_host(host);
+ return error;
+}
+
+static void __exit
+simscsi_exit(void)
+{
+ scsi_remove_host(host);
+ scsi_host_put(host);
+}
+
+module_init(simscsi_init);
+module_exit(simscsi_exit);
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
new file mode 100644
index 00000000000..786e70718ce
--- /dev/null
+++ b/arch/ia64/hp/sim/simserial.c
@@ -0,0 +1,1032 @@
+/*
+ * Simulated Serial Driver (fake serial)
+ *
+ * This driver is mostly used for bringup purposes and will go away.
+ * It has a strong dependency on the system console. All outputs
+ * are rerouted to the same facility as the one used by printk which, in our
+ * case means sys_sim.c console (goes via the simulator). The code hereafter
+ * is completely leveraged from the serial.c driver.
+ *
+ * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 02/04/00 D. Mosberger Merged in serial.c bug fixes in rs_close().
+ * 02/25/00 D. Mosberger Synced up with 2.3.99pre-5 version of serial.c.
+ * 07/30/02 D. Mosberger Replace sti()/cli() with explicit spinlocks & local irq masking
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/major.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/console.h>
+#include <linux/module.h>
+#include <linux/serial.h>
+#include <linux/serialP.h>
+
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_KDB
+# include <linux/kdb.h>
+#endif
+
+#undef SIMSERIAL_DEBUG /* define this to get some debug information */
+
+#define KEYBOARD_INTR 3 /* must match with simulator! */
+
+#define NR_PORTS 1 /* only one port for now */
+#define SERIAL_INLINE 1
+
+#ifdef SERIAL_INLINE
+#define _INLINE_ inline
+#endif
+
+#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT)
+
+#define SSC_GETCHAR 21
+
+extern long ia64_ssc (long, long, long, long, int);
+extern void ia64_ssc_connect_irq (long intr, long irq);
+
+static char *serial_name = "SimSerial driver";
+static char *serial_version = "0.6";
+
+/*
+ * This has been extracted from asm/serial.h. We need one eventually but
+ * I don't know exactly what we're going to put in it so just fake one
+ * for now.
+ */
+#define BASE_BAUD ( 1843200 / 16 )
+
+#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
+
+/*
+ * Most of the values here are meaningless to this particular driver.
+ * However some values must be preserved for the code (leveraged from serial.c
+ * to work correctly).
+ * port must not be 0
+ * type must not be UNKNOWN
+ * So I picked arbitrary (guess from where?) values instead
+ */
+static struct serial_state rs_table[NR_PORTS]={
+ /* UART CLK PORT IRQ FLAGS */
+ { 0, BASE_BAUD, 0x3F8, 0, STD_COM_FLAGS,0,PORT_16550 } /* ttyS0 */
+};
+
+/*
+ * Just for the fun of it !
+ */
+static struct serial_uart_config uart_config[] = {
+ { "unknown", 1, 0 },
+ { "8250", 1, 0 },
+ { "16450", 1, 0 },
+ { "16550", 1, 0 },
+ { "16550A", 16, UART_CLEAR_FIFO | UART_USE_FIFO },
+ { "cirrus", 1, 0 },
+ { "ST16650", 1, UART_CLEAR_FIFO | UART_STARTECH },
+ { "ST16650V2", 32, UART_CLEAR_FIFO | UART_USE_FIFO |
+ UART_STARTECH },
+ { "TI16750", 64, UART_CLEAR_FIFO | UART_USE_FIFO},
+ { 0, 0}
+};
+
+struct tty_driver *hp_simserial_driver;
+
+static struct async_struct *IRQ_ports[NR_IRQS];
+
+static struct console *console;
+
+static unsigned char *tmp_buf;
+static DECLARE_MUTEX(tmp_buf_sem);
+
+extern struct console *console_drivers; /* from kernel/printk.c */
+
+/*
+ * ------------------------------------------------------------
+ * rs_stop() and rs_start()
+ *
+ * This routines are called before setting or resetting tty->stopped.
+ * They enable or disable transmitter interrupts, as necessary.
+ * ------------------------------------------------------------
+ */
+static void rs_stop(struct tty_struct *tty)
+{
+#ifdef SIMSERIAL_DEBUG
+ printk("rs_stop: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n",
+ tty->stopped, tty->hw_stopped, tty->flow_stopped);
+#endif
+
+}
+
+static void rs_start(struct tty_struct *tty)
+{
+#if SIMSERIAL_DEBUG
+ printk("rs_start: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n",
+ tty->stopped, tty->hw_stopped, tty->flow_stopped);
+#endif
+}
+
+static void receive_chars(struct tty_struct *tty, struct pt_regs *regs)
+{
+ unsigned char ch;
+ static unsigned char seen_esc = 0;
+
+ while ( (ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR)) ) {
+ if ( ch == 27 && seen_esc == 0 ) {
+ seen_esc = 1;
+ continue;
+ } else {
+ if ( seen_esc==1 && ch == 'O' ) {
+ seen_esc = 2;
+ continue;
+ } else if ( seen_esc == 2 ) {
+ if ( ch == 'P' ) show_state(); /* F1 key */
+#ifdef CONFIG_KDB
+ if ( ch == 'S' )
+ kdb(KDB_REASON_KEYBOARD, 0, (kdb_eframe_t) regs);
+#endif
+
+ seen_esc = 0;
+ continue;
+ }
+ }
+ seen_esc = 0;
+ if (tty->flip.count >= TTY_FLIPBUF_SIZE) break;
+
+ *tty->flip.char_buf_ptr = ch;
+
+ *tty->flip.flag_buf_ptr = 0;
+
+ tty->flip.flag_buf_ptr++;
+ tty->flip.char_buf_ptr++;
+ tty->flip.count++;
+ }
+ tty_flip_buffer_push(tty);
+}
+
+/*
+ * This is the serial driver's interrupt routine for a single port
+ */
+static irqreturn_t rs_interrupt_single(int irq, void *dev_id, struct pt_regs * regs)
+{
+ struct async_struct * info;
+
+ /*
+ * I don't know exactly why they don't use the dev_id opaque data
+ * pointer instead of this extra lookup table
+ */
+ info = IRQ_ports[irq];
+ if (!info || !info->tty) {
+ printk(KERN_INFO "simrs_interrupt_single: info|tty=0 info=%p problem\n", info);
+ return IRQ_NONE;
+ }
+ /*
+ * pretty simple in our case, because we only get interrupts
+ * on inbound traffic
+ */
+ receive_chars(info->tty, regs);
+ return IRQ_HANDLED;
+}
+
+/*
+ * -------------------------------------------------------------------
+ * Here ends the serial interrupt routines.
+ * -------------------------------------------------------------------
+ */
+
+#if 0
+/*
+ * not really used in our situation so keep them commented out for now
+ */
+static DECLARE_TASK_QUEUE(tq_serial); /* used to be at the top of the file */
+static void do_serial_bh(void)
+{
+ run_task_queue(&tq_serial);
+ printk(KERN_ERR "do_serial_bh: called\n");
+}
+#endif
+
+static void do_softint(void *private_)
+{
+ printk(KERN_ERR "simserial: do_softint called\n");
+}
+
+static void rs_put_char(struct tty_struct *tty, unsigned char ch)
+{
+ struct async_struct *info = (struct async_struct *)tty->driver_data;
+ unsigned long flags;
+
+ if (!tty || !info->xmit.buf) return;
+
+ local_irq_save(flags);
+ if (CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) == 0) {
+ local_irq_restore(flags);
+ return;
+ }
+ info->xmit.buf[info->xmit.head] = ch;
+ info->xmit.head = (info->xmit.head + 1) & (SERIAL_XMIT_SIZE-1);
+ local_irq_restore(flags);
+}
+
+static _INLINE_ void transmit_chars(struct async_struct *info, int *intr_done)
+{
+ int count;
+ unsigned long flags;
+
+
+ local_irq_save(flags);
+
+ if (info->x_char) {
+ char c = info->x_char;
+
+ console->write(console, &c, 1);
+
+ info->state->icount.tx++;
+ info->x_char = 0;
+
+ goto out;
+ }
+
+ if (info->xmit.head == info->xmit.tail || info->tty->stopped || info->tty->hw_stopped) {
+#ifdef SIMSERIAL_DEBUG
+ printk("transmit_chars: head=%d, tail=%d, stopped=%d\n",
+ info->xmit.head, info->xmit.tail, info->tty->stopped);
+#endif
+ goto out;
+ }
+ /*
+ * We removed the loop and try to do it in to chunks. We need
+ * 2 operations maximum because it's a ring buffer.
+ *
+ * First from current to tail if possible.
+ * Then from the beginning of the buffer until necessary
+ */
+
+ count = min(CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE),
+ SERIAL_XMIT_SIZE - info->xmit.tail);
+ console->write(console, info->xmit.buf+info->xmit.tail, count);
+
+ info->xmit.tail = (info->xmit.tail+count) & (SERIAL_XMIT_SIZE-1);
+
+ /*
+ * We have more at the beginning of the buffer
+ */
+ count = CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
+ if (count) {
+ console->write(console, info->xmit.buf, count);
+ info->xmit.tail += count;
+ }
+out:
+ local_irq_restore(flags);
+}
+
+static void rs_flush_chars(struct tty_struct *tty)
+{
+ struct async_struct *info = (struct async_struct *)tty->driver_data;
+
+ if (info->xmit.head == info->xmit.tail || tty->stopped || tty->hw_stopped ||
+ !info->xmit.buf)
+ return;
+
+ transmit_chars(info, NULL);
+}
+
+
+static int rs_write(struct tty_struct * tty,
+ const unsigned char *buf, int count)
+{
+ int c, ret = 0;
+ struct async_struct *info = (struct async_struct *)tty->driver_data;
+ unsigned long flags;
+
+ if (!tty || !info->xmit.buf || !tmp_buf) return 0;
+
+ local_irq_save(flags);
+ while (1) {
+ c = CIRC_SPACE_TO_END(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
+ if (count < c)
+ c = count;
+ if (c <= 0) {
+ break;
+ }
+ memcpy(info->xmit.buf + info->xmit.head, buf, c);
+ info->xmit.head = ((info->xmit.head + c) &
+ (SERIAL_XMIT_SIZE-1));
+ buf += c;
+ count -= c;
+ ret += c;
+ }
+ local_irq_restore(flags);
+ /*
+ * Hey, we transmit directly from here in our case
+ */
+ if (CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE)
+ && !tty->stopped && !tty->hw_stopped) {
+ transmit_chars(info, NULL);
+ }
+ return ret;
+}
+
+static int rs_write_room(struct tty_struct *tty)
+{
+ struct async_struct *info = (struct async_struct *)tty->driver_data;
+
+ return CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
+}
+
+static int rs_chars_in_buffer(struct tty_struct *tty)
+{
+ struct async_struct *info = (struct async_struct *)tty->driver_data;
+
+ return CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
+}
+
+static void rs_flush_buffer(struct tty_struct *tty)
+{
+ struct async_struct *info = (struct async_struct *)tty->driver_data;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ info->xmit.head = info->xmit.tail = 0;
+ local_irq_restore(flags);
+
+ wake_up_interruptible(&tty->write_wait);
+
+ if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
+ tty->ldisc.write_wakeup)
+ (tty->ldisc.write_wakeup)(tty);
+}
+
+/*
+ * This function is used to send a high-priority XON/XOFF character to
+ * the device
+ */
+static void rs_send_xchar(struct tty_struct *tty, char ch)
+{
+ struct async_struct *info = (struct async_struct *)tty->driver_data;
+
+ info->x_char = ch;
+ if (ch) {
+ /*
+ * I guess we could call console->write() directly but
+ * let's do that for now.
+ */
+ transmit_chars(info, NULL);
+ }
+}
+
+/*
+ * ------------------------------------------------------------
+ * rs_throttle()
+ *
+ * This routine is called by the upper-layer tty layer to signal that
+ * incoming characters should be throttled.
+ * ------------------------------------------------------------
+ */
+static void rs_throttle(struct tty_struct * tty)
+{
+ if (I_IXOFF(tty)) rs_send_xchar(tty, STOP_CHAR(tty));
+
+ printk(KERN_INFO "simrs_throttle called\n");
+}
+
+static void rs_unthrottle(struct tty_struct * tty)
+{
+ struct async_struct *info = (struct async_struct *)tty->driver_data;
+
+ if (I_IXOFF(tty)) {
+ if (info->x_char)
+ info->x_char = 0;
+ else
+ rs_send_xchar(tty, START_CHAR(tty));
+ }
+ printk(KERN_INFO "simrs_unthrottle called\n");
+}
+
+/*
+ * rs_break() --- routine which turns the break handling on or off
+ */
+static void rs_break(struct tty_struct *tty, int break_state)
+{
+}
+
+static int rs_ioctl(struct tty_struct *tty, struct file * file,
+ unsigned int cmd, unsigned long arg)
+{
+ if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) &&
+ (cmd != TIOCSERCONFIG) && (cmd != TIOCSERGSTRUCT) &&
+ (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT)) {
+ if (tty->flags & (1 << TTY_IO_ERROR))
+ return -EIO;
+ }
+
+ switch (cmd) {
+ case TIOCMGET:
+ printk(KERN_INFO "rs_ioctl: TIOCMGET called\n");
+ return -EINVAL;
+ case TIOCMBIS:
+ case TIOCMBIC:
+ case TIOCMSET:
+ printk(KERN_INFO "rs_ioctl: TIOCMBIS/BIC/SET called\n");
+ return -EINVAL;
+ case TIOCGSERIAL:
+ printk(KERN_INFO "simrs_ioctl TIOCGSERIAL called\n");
+ return 0;
+ case TIOCSSERIAL:
+ printk(KERN_INFO "simrs_ioctl TIOCSSERIAL called\n");
+ return 0;
+ case TIOCSERCONFIG:
+ printk(KERN_INFO "rs_ioctl: TIOCSERCONFIG called\n");
+ return -EINVAL;
+
+ case TIOCSERGETLSR: /* Get line status register */
+ printk(KERN_INFO "rs_ioctl: TIOCSERGETLSR called\n");
+ return -EINVAL;
+
+ case TIOCSERGSTRUCT:
+ printk(KERN_INFO "rs_ioctl: TIOCSERGSTRUCT called\n");
+#if 0
+ if (copy_to_user((struct async_struct *) arg,
+ info, sizeof(struct async_struct)))
+ return -EFAULT;
+#endif
+ return 0;
+
+ /*
+ * Wait for any of the 4 modem inputs (DCD,RI,DSR,CTS) to change
+ * - mask passed in arg for lines of interest
+ * (use |'ed TIOCM_RNG/DSR/CD/CTS for masking)
+ * Caller should use TIOCGICOUNT to see which one it was
+ */
+ case TIOCMIWAIT:
+ printk(KERN_INFO "rs_ioctl: TIOCMIWAIT: called\n");
+ return 0;
+ /*
+ * Get counter of input serial line interrupts (DCD,RI,DSR,CTS)
+ * Return: write counters to the user passed counter struct
+ * NB: both 1->0 and 0->1 transitions are counted except for
+ * RI where only 0->1 is counted.
+ */
+ case TIOCGICOUNT:
+ printk(KERN_INFO "rs_ioctl: TIOCGICOUNT called\n");
+ return 0;
+
+ case TIOCSERGWILD:
+ case TIOCSERSWILD:
+ /* "setserial -W" is called in Debian boot */
+ printk (KERN_INFO "TIOCSER?WILD ioctl obsolete, ignored.\n");
+ return 0;
+
+ default:
+ return -ENOIOCTLCMD;
+ }
+ return 0;
+}
+
+#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
+
+static void rs_set_termios(struct tty_struct *tty, struct termios *old_termios)
+{
+ unsigned int cflag = tty->termios->c_cflag;
+
+ if ( (cflag == old_termios->c_cflag)
+ && ( RELEVANT_IFLAG(tty->termios->c_iflag)
+ == RELEVANT_IFLAG(old_termios->c_iflag)))
+ return;
+
+
+ /* Handle turning off CRTSCTS */
+ if ((old_termios->c_cflag & CRTSCTS) &&
+ !(tty->termios->c_cflag & CRTSCTS)) {
+ tty->hw_stopped = 0;
+ rs_start(tty);
+ }
+}
+/*
+ * This routine will shutdown a serial port; interrupts are disabled, and
+ * DTR is dropped if the hangup on close termio flag is on.
+ */
+static void shutdown(struct async_struct * info)
+{
+ unsigned long flags;
+ struct serial_state *state;
+ int retval;
+
+ if (!(info->flags & ASYNC_INITIALIZED)) return;
+
+ state = info->state;
+
+#ifdef SIMSERIAL_DEBUG
+ printk("Shutting down serial port %d (irq %d)....", info->line,
+ state->irq);
+#endif
+
+ local_irq_save(flags);
+ {
+ /*
+ * First unlink the serial port from the IRQ chain...
+ */
+ if (info->next_port)
+ info->next_port->prev_port = info->prev_port;
+ if (info->prev_port)
+ info->prev_port->next_port = info->next_port;
+ else
+ IRQ_ports[state->irq] = info->next_port;
+
+ /*
+ * Free the IRQ, if necessary
+ */
+ if (state->irq && (!IRQ_ports[state->irq] ||
+ !IRQ_ports[state->irq]->next_port)) {
+ if (IRQ_ports[state->irq]) {
+ free_irq(state->irq, NULL);
+ retval = request_irq(state->irq, rs_interrupt_single,
+ IRQ_T(info), "serial", NULL);
+
+ if (retval)
+ printk(KERN_ERR "serial shutdown: request_irq: error %d"
+ " Couldn't reacquire IRQ.\n", retval);
+ } else
+ free_irq(state->irq, NULL);
+ }
+
+ if (info->xmit.buf) {
+ free_page((unsigned long) info->xmit.buf);
+ info->xmit.buf = 0;
+ }
+
+ if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags);
+
+ info->flags &= ~ASYNC_INITIALIZED;
+ }
+ local_irq_restore(flags);
+}
+
+/*
+ * ------------------------------------------------------------
+ * rs_close()
+ *
+ * This routine is called when the serial port gets closed. First, we
+ * wait for the last remaining data to be sent. Then, we unlink its
+ * async structure from the interrupt chain if necessary, and we free
+ * that IRQ if nothing is left in the chain.
+ * ------------------------------------------------------------
+ */
+static void rs_close(struct tty_struct *tty, struct file * filp)
+{
+ struct async_struct * info = (struct async_struct *)tty->driver_data;
+ struct serial_state *state;
+ unsigned long flags;
+
+ if (!info ) return;
+
+ state = info->state;
+
+ local_irq_save(flags);
+ if (tty_hung_up_p(filp)) {
+#ifdef SIMSERIAL_DEBUG
+ printk("rs_close: hung_up\n");
+#endif
+ local_irq_restore(flags);
+ return;
+ }
+#ifdef SIMSERIAL_DEBUG
+ printk("rs_close ttys%d, count = %d\n", info->line, state->count);
+#endif
+ if ((tty->count == 1) && (state->count != 1)) {
+ /*
+ * Uh, oh. tty->count is 1, which means that the tty
+ * structure will be freed. state->count should always
+ * be one in these conditions. If it's greater than
+ * one, we've got real problems, since it means the
+ * serial port won't be shutdown.
+ */
+ printk(KERN_ERR "rs_close: bad serial port count; tty->count is 1, "
+ "state->count is %d\n", state->count);
+ state->count = 1;
+ }
+ if (--state->count < 0) {
+ printk(KERN_ERR "rs_close: bad serial port count for ttys%d: %d\n",
+ info->line, state->count);
+ state->count = 0;
+ }
+ if (state->count) {
+ local_irq_restore(flags);
+ return;
+ }
+ info->flags |= ASYNC_CLOSING;
+ local_irq_restore(flags);
+
+ /*
+ * Now we wait for the transmit buffer to clear; and we notify
+ * the line discipline to only process XON/XOFF characters.
+ */
+ shutdown(info);
+ if (tty->driver->flush_buffer) tty->driver->flush_buffer(tty);
+ if (tty->ldisc.flush_buffer) tty->ldisc.flush_buffer(tty);
+ info->event = 0;
+ info->tty = 0;
+ if (info->blocked_open) {
+ if (info->close_delay) {
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(info->close_delay);
+ }
+ wake_up_interruptible(&info->open_wait);
+ }
+ info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
+ wake_up_interruptible(&info->close_wait);
+}
+
+/*
+ * rs_wait_until_sent() --- wait until the transmitter is empty
+ */
+static void rs_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+}
+
+
+/*
+ * rs_hangup() --- called by tty_hangup() when a hangup is signaled.
+ */
+static void rs_hangup(struct tty_struct *tty)
+{
+ struct async_struct * info = (struct async_struct *)tty->driver_data;
+ struct serial_state *state = info->state;
+
+#ifdef SIMSERIAL_DEBUG
+ printk("rs_hangup: called\n");
+#endif
+
+ state = info->state;
+
+ rs_flush_buffer(tty);
+ if (info->flags & ASYNC_CLOSING)
+ return;
+ shutdown(info);
+
+ info->event = 0;
+ state->count = 0;
+ info->flags &= ~ASYNC_NORMAL_ACTIVE;
+ info->tty = 0;
+ wake_up_interruptible(&info->open_wait);
+}
+
+
+static int get_async_struct(int line, struct async_struct **ret_info)
+{
+ struct async_struct *info;
+ struct serial_state *sstate;
+
+ sstate = rs_table + line;
+ sstate->count++;
+ if (sstate->info) {
+ *ret_info = sstate->info;
+ return 0;
+ }
+ info = kmalloc(sizeof(struct async_struct), GFP_KERNEL);
+ if (!info) {
+ sstate->count--;
+ return -ENOMEM;
+ }
+ memset(info, 0, sizeof(struct async_struct));
+ init_waitqueue_head(&info->open_wait);
+ init_waitqueue_head(&info->close_wait);
+ init_waitqueue_head(&info->delta_msr_wait);
+ info->magic = SERIAL_MAGIC;
+ info->port = sstate->port;
+ info->flags = sstate->flags;
+ info->xmit_fifo_size = sstate->xmit_fifo_size;
+ info->line = line;
+ INIT_WORK(&info->work, do_softint, info);
+ info->state = sstate;
+ if (sstate->info) {
+ kfree(info);
+ *ret_info = sstate->info;
+ return 0;
+ }
+ *ret_info = sstate->info = info;
+ return 0;
+}
+
+static int
+startup(struct async_struct *info)
+{
+ unsigned long flags;
+ int retval=0;
+ irqreturn_t (*handler)(int, void *, struct pt_regs *);
+ struct serial_state *state= info->state;
+ unsigned long page;
+
+ page = get_zeroed_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ local_irq_save(flags);
+
+ if (info->flags & ASYNC_INITIALIZED) {
+ free_page(page);
+ goto errout;
+ }
+
+ if (!state->port || !state->type) {
+ if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags);
+ free_page(page);
+ goto errout;
+ }
+ if (info->xmit.buf)
+ free_page(page);
+ else
+ info->xmit.buf = (unsigned char *) page;
+
+#ifdef SIMSERIAL_DEBUG
+ printk("startup: ttys%d (irq %d)...", info->line, state->irq);
+#endif
+
+ /*
+ * Allocate the IRQ if necessary
+ */
+ if (state->irq && (!IRQ_ports[state->irq] ||
+ !IRQ_ports[state->irq]->next_port)) {
+ if (IRQ_ports[state->irq]) {
+ retval = -EBUSY;
+ goto errout;
+ } else
+ handler = rs_interrupt_single;
+
+ retval = request_irq(state->irq, handler, IRQ_T(info), "simserial", NULL);
+ if (retval) {
+ if (capable(CAP_SYS_ADMIN)) {
+ if (info->tty)
+ set_bit(TTY_IO_ERROR,
+ &info->tty->flags);
+ retval = 0;
+ }
+ goto errout;
+ }
+ }
+
+ /*
+ * Insert serial port into IRQ chain.
+ */
+ info->prev_port = 0;
+ info->next_port = IRQ_ports[state->irq];
+ if (info->next_port)
+ info->next_port->prev_port = info;
+ IRQ_ports[state->irq] = info;
+
+ if (info->tty) clear_bit(TTY_IO_ERROR, &info->tty->flags);
+
+ info->xmit.head = info->xmit.tail = 0;
+
+#if 0
+ /*
+ * Set up serial timers...
+ */
+ timer_table[RS_TIMER].expires = jiffies + 2*HZ/100;
+ timer_active |= 1 << RS_TIMER;
+#endif
+
+ /*
+ * Set up the tty->alt_speed kludge
+ */
+ if (info->tty) {
+ if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
+ info->tty->alt_speed = 57600;
+ if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
+ info->tty->alt_speed = 115200;
+ if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
+ info->tty->alt_speed = 230400;
+ if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
+ info->tty->alt_speed = 460800;
+ }
+
+ info->flags |= ASYNC_INITIALIZED;
+ local_irq_restore(flags);
+ return 0;
+
+errout:
+ local_irq_restore(flags);
+ return retval;
+}
+
+
+/*
+ * This routine is called whenever a serial port is opened. It
+ * enables interrupts for a serial port, linking in its async structure into
+ * the IRQ chain. It also performs the serial-specific
+ * initialization for the tty structure.
+ */
+static int rs_open(struct tty_struct *tty, struct file * filp)
+{
+ struct async_struct *info;
+ int retval, line;
+ unsigned long page;
+
+ line = tty->index;
+ if ((line < 0) || (line >= NR_PORTS))
+ return -ENODEV;
+ retval = get_async_struct(line, &info);
+ if (retval)
+ return retval;
+ tty->driver_data = info;
+ info->tty = tty;
+
+#ifdef SIMSERIAL_DEBUG
+ printk("rs_open %s, count = %d\n", tty->name, info->state->count);
+#endif
+ info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+
+ if (!tmp_buf) {
+ page = get_zeroed_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+ if (tmp_buf)
+ free_page(page);
+ else
+ tmp_buf = (unsigned char *) page;
+ }
+
+ /*
+ * If the port is the middle of closing, bail out now
+ */
+ if (tty_hung_up_p(filp) ||
+ (info->flags & ASYNC_CLOSING)) {
+ if (info->flags & ASYNC_CLOSING)
+ interruptible_sleep_on(&info->close_wait);
+#ifdef SERIAL_DO_RESTART
+ return ((info->flags & ASYNC_HUP_NOTIFY) ?
+ -EAGAIN : -ERESTARTSYS);
+#else
+ return -EAGAIN;
+#endif
+ }
+
+ /*
+ * Start up serial port
+ */
+ retval = startup(info);
+ if (retval) {
+ return retval;
+ }
+
+ /*
+ * figure out which console to use (should be one already)
+ */
+ console = console_drivers;
+ while (console) {
+ if ((console->flags & CON_ENABLED) && console->write) break;
+ console = console->next;
+ }
+
+#ifdef SIMSERIAL_DEBUG
+ printk("rs_open ttys%d successful\n", info->line);
+#endif
+ return 0;
+}
+
+/*
+ * /proc fs routines....
+ */
+
+static inline int line_info(char *buf, struct serial_state *state)
+{
+ return sprintf(buf, "%d: uart:%s port:%lX irq:%d\n",
+ state->line, uart_config[state->type].name,
+ state->port, state->irq);
+}
+
+static int rs_read_proc(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ int i, len = 0, l;
+ off_t begin = 0;
+
+ len += sprintf(page, "simserinfo:1.0 driver:%s\n", serial_version);
+ for (i = 0; i < NR_PORTS && len < 4000; i++) {
+ l = line_info(page + len, &rs_table[i]);
+ len += l;
+ if (len+begin > off+count)
+ goto done;
+ if (len+begin < off) {
+ begin += len;
+ len = 0;
+ }
+ }
+ *eof = 1;
+done:
+ if (off >= len+begin)
+ return 0;
+ *start = page + (begin-off);
+ return ((count < begin+len-off) ? count : begin+len-off);
+}
+
+/*
+ * ---------------------------------------------------------------------
+ * rs_init() and friends
+ *
+ * rs_init() is called at boot-time to initialize the serial driver.
+ * ---------------------------------------------------------------------
+ */
+
+/*
+ * This routine prints out the appropriate serial driver version
+ * number, and identifies which options were configured into this
+ * driver.
+ */
+static inline void show_serial_version(void)
+{
+ printk(KERN_INFO "%s version %s with", serial_name, serial_version);
+ printk(KERN_INFO " no serial options enabled\n");
+}
+
+static struct tty_operations hp_ops = {
+ .open = rs_open,
+ .close = rs_close,
+ .write = rs_write,
+ .put_char = rs_put_char,
+ .flush_chars = rs_flush_chars,
+ .write_room = rs_write_room,
+ .chars_in_buffer = rs_chars_in_buffer,
+ .flush_buffer = rs_flush_buffer,
+ .ioctl = rs_ioctl,
+ .throttle = rs_throttle,
+ .unthrottle = rs_unthrottle,
+ .send_xchar = rs_send_xchar,
+ .set_termios = rs_set_termios,
+ .stop = rs_stop,
+ .start = rs_start,
+ .hangup = rs_hangup,
+ .break_ctl = rs_break,
+ .wait_until_sent = rs_wait_until_sent,
+ .read_proc = rs_read_proc,
+};
+
+/*
+ * The serial driver boot-time initialization code!
+ */
+static int __init
+simrs_init (void)
+{
+ int i;
+ struct serial_state *state;
+
+ if (!ia64_platform_is("hpsim"))
+ return -ENODEV;
+
+ hp_simserial_driver = alloc_tty_driver(1);
+ if (!hp_simserial_driver)
+ return -ENOMEM;
+
+ show_serial_version();
+
+ /* Initialize the tty_driver structure */
+
+ hp_simserial_driver->owner = THIS_MODULE;
+ hp_simserial_driver->driver_name = "simserial";
+ hp_simserial_driver->name = "ttyS";
+ hp_simserial_driver->major = TTY_MAJOR;
+ hp_simserial_driver->minor_start = 64;
+ hp_simserial_driver->type = TTY_DRIVER_TYPE_SERIAL;
+ hp_simserial_driver->subtype = SERIAL_TYPE_NORMAL;
+ hp_simserial_driver->init_termios = tty_std_termios;
+ hp_simserial_driver->init_termios.c_cflag =
+ B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+ hp_simserial_driver->flags = TTY_DRIVER_REAL_RAW;
+ tty_set_operations(hp_simserial_driver, &hp_ops);
+
+ /*
+ * Let's have a little bit of fun !
+ */
+ for (i = 0, state = rs_table; i < NR_PORTS; i++,state++) {
+
+ if (state->type == PORT_UNKNOWN) continue;
+
+ if (!state->irq) {
+ state->irq = assign_irq_vector(AUTO_ASSIGN);
+ ia64_ssc_connect_irq(KEYBOARD_INTR, state->irq);
+ }
+
+ printk(KERN_INFO "ttyS%d at 0x%04lx (irq = %d) is a %s\n",
+ state->line,
+ state->port, state->irq,
+ uart_config[state->type].name);
+ }
+
+ if (tty_register_driver(hp_simserial_driver))
+ panic("Couldn't register simserial driver\n");
+
+ return 0;
+}
+
+#ifndef MODULE
+__initcall(simrs_init);
+#endif
diff --git a/arch/ia64/hp/zx1/Makefile b/arch/ia64/hp/zx1/Makefile
new file mode 100644
index 00000000000..61e878729d1
--- /dev/null
+++ b/arch/ia64/hp/zx1/Makefile
@@ -0,0 +1,8 @@
+#
+# ia64/hp/zx1/Makefile
+#
+# Copyright (C) 2002 Hewlett Packard
+# Copyright (C) Alex Williamson (alex_williamson@hp.com)
+#
+
+obj-$(CONFIG_IA64_GENERIC) += hpzx1_machvec.o hpzx1_swiotlb_machvec.o
diff --git a/arch/ia64/hp/zx1/hpzx1_machvec.c b/arch/ia64/hp/zx1/hpzx1_machvec.c
new file mode 100644
index 00000000000..32518b0f923
--- /dev/null
+++ b/arch/ia64/hp/zx1/hpzx1_machvec.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME hpzx1
+#define MACHVEC_PLATFORM_HEADER <asm/machvec_hpzx1.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c b/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c
new file mode 100644
index 00000000000..4392a96b3c5
--- /dev/null
+++ b/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME hpzx1_swiotlb
+#define MACHVEC_PLATFORM_HEADER <asm/machvec_hpzx1_swiotlb.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/ia32/Makefile b/arch/ia64/ia32/Makefile
new file mode 100644
index 00000000000..2ed90da8116
--- /dev/null
+++ b/arch/ia64/ia32/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the ia32 kernel emulation subsystem.
+#
+
+obj-y := ia32_entry.o sys_ia32.o ia32_ioctl.o ia32_signal.o \
+ ia32_support.o ia32_traps.o binfmt_elf32.o ia32_ldt.o
+
+CFLAGS_ia32_ioctl.o += -Ifs/
+
+# Don't let GCC uses f16-f31 so that save_ia32_fpstate_live() and
+# restore_ia32_fpstate_live() can be sure the live register contain user-level state.
+CFLAGS_ia32_signal.o += -mfixed-range=f16-f31
diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
new file mode 100644
index 00000000000..31de70b7c67
--- /dev/null
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -0,0 +1,294 @@
+/*
+ * IA-32 ELF support.
+ *
+ * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 2001 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 06/16/00 A. Mallick initialize csd/ssd/tssd/cflg for ia32_load_state
+ * 04/13/01 D. Mosberger dropped saving tssd in ar.k1---it's not needed
+ * 09/14/01 D. Mosberger fixed memory management for gdt/tss page
+ */
+#include <linux/config.h>
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/security.h>
+
+#include <asm/param.h>
+#include <asm/signal.h>
+
+#include "ia32priv.h"
+#include "elfcore32.h"
+
+/* Override some function names */
+#undef start_thread
+#define start_thread ia32_start_thread
+#define elf_format elf32_format
+#define init_elf_binfmt init_elf32_binfmt
+#define exit_elf_binfmt exit_elf32_binfmt
+
+#undef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC IA32_CLOCKS_PER_SEC
+
+extern void ia64_elf32_init (struct pt_regs *regs);
+
+static void elf32_set_personality (void);
+
+#define setup_arg_pages(bprm,tos,exec) ia32_setup_arg_pages(bprm,exec)
+#define elf_map elf32_map
+
+#undef SET_PERSONALITY
+#define SET_PERSONALITY(ex, ibcs2) elf32_set_personality()
+
+#define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack))
+
+/* Ugly but avoids duplication */
+#include "../../../fs/binfmt_elf.c"
+
+extern struct page *ia32_shared_page[];
+extern unsigned long *ia32_gdt;
+extern struct page *ia32_gate_page;
+
+struct page *
+ia32_install_shared_page (struct vm_area_struct *vma, unsigned long address, int *type)
+{
+ struct page *pg = ia32_shared_page[smp_processor_id()];
+ get_page(pg);
+ if (type)
+ *type = VM_FAULT_MINOR;
+ return pg;
+}
+
+struct page *
+ia32_install_gate_page (struct vm_area_struct *vma, unsigned long address, int *type)
+{
+ struct page *pg = ia32_gate_page;
+ get_page(pg);
+ if (type)
+ *type = VM_FAULT_MINOR;
+ return pg;
+}
+
+
+static struct vm_operations_struct ia32_shared_page_vm_ops = {
+ .nopage = ia32_install_shared_page
+};
+
+static struct vm_operations_struct ia32_gate_page_vm_ops = {
+ .nopage = ia32_install_gate_page
+};
+
+void
+ia64_elf32_init (struct pt_regs *regs)
+{
+ struct vm_area_struct *vma;
+
+ /*
+ * Map GDT below 4GB, where the processor can find it. We need to map
+ * it with privilege level 3 because the IVE uses non-privileged accesses to these
+ * tables. IA-32 segmentation is used to protect against IA-32 accesses to them.
+ */
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (vma) {
+ memset(vma, 0, sizeof(*vma));
+ vma->vm_mm = current->mm;
+ vma->vm_start = IA32_GDT_OFFSET;
+ vma->vm_end = vma->vm_start + PAGE_SIZE;
+ vma->vm_page_prot = PAGE_SHARED;
+ vma->vm_flags = VM_READ|VM_MAYREAD|VM_RESERVED;
+ vma->vm_ops = &ia32_shared_page_vm_ops;
+ down_write(&current->mm->mmap_sem);
+ {
+ if (insert_vm_struct(current->mm, vma)) {
+ kmem_cache_free(vm_area_cachep, vma);
+ up_write(&current->mm->mmap_sem);
+ BUG();
+ }
+ }
+ up_write(&current->mm->mmap_sem);
+ }
+
+ /*
+ * When user stack is not executable, push sigreturn code to stack makes
+ * segmentation fault raised when returning to kernel. So now sigreturn
+ * code is locked in specific gate page, which is pointed by pretcode
+ * when setup_frame_ia32
+ */
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (vma) {
+ memset(vma, 0, sizeof(*vma));
+ vma->vm_mm = current->mm;
+ vma->vm_start = IA32_GATE_OFFSET;
+ vma->vm_end = vma->vm_start + PAGE_SIZE;
+ vma->vm_page_prot = PAGE_COPY_EXEC;
+ vma->vm_flags = VM_READ | VM_MAYREAD | VM_EXEC
+ | VM_MAYEXEC | VM_RESERVED;
+ vma->vm_ops = &ia32_gate_page_vm_ops;
+ down_write(&current->mm->mmap_sem);
+ {
+ if (insert_vm_struct(current->mm, vma)) {
+ kmem_cache_free(vm_area_cachep, vma);
+ up_write(&current->mm->mmap_sem);
+ BUG();
+ }
+ }
+ up_write(&current->mm->mmap_sem);
+ }
+
+ /*
+ * Install LDT as anonymous memory. This gives us all-zero segment descriptors
+ * until a task modifies them via modify_ldt().
+ */
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (vma) {
+ memset(vma, 0, sizeof(*vma));
+ vma->vm_mm = current->mm;
+ vma->vm_start = IA32_LDT_OFFSET;
+ vma->vm_end = vma->vm_start + PAGE_ALIGN(IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE);
+ vma->vm_page_prot = PAGE_SHARED;
+ vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE;
+ down_write(&current->mm->mmap_sem);
+ {
+ if (insert_vm_struct(current->mm, vma)) {
+ kmem_cache_free(vm_area_cachep, vma);
+ up_write(&current->mm->mmap_sem);
+ BUG();
+ }
+ }
+ up_write(&current->mm->mmap_sem);
+ }
+
+ ia64_psr(regs)->ac = 0; /* turn off alignment checking */
+ regs->loadrs = 0;
+ /*
+ * According to the ABI %edx points to an `atexit' handler. Since we don't have
+ * one we'll set it to 0 and initialize all the other registers just to make
+ * things more deterministic, ala the i386 implementation.
+ */
+ regs->r8 = 0; /* %eax */
+ regs->r11 = 0; /* %ebx */
+ regs->r9 = 0; /* %ecx */
+ regs->r10 = 0; /* %edx */
+ regs->r13 = 0; /* %ebp */
+ regs->r14 = 0; /* %esi */
+ regs->r15 = 0; /* %edi */
+
+ current->thread.eflag = IA32_EFLAG;
+ current->thread.fsr = IA32_FSR_DEFAULT;
+ current->thread.fcr = IA32_FCR_DEFAULT;
+ current->thread.fir = 0;
+ current->thread.fdr = 0;
+
+ /*
+ * Setup GDTD. Note: GDTD is the descrambled version of the pseudo-descriptor
+ * format defined by Figure 3-11 "Pseudo-Descriptor Format" in the IA-32
+ * architecture manual. Also note that the only fields that are not ignored are
+ * `base', `limit', 'G', `P' (must be 1) and `S' (must be 0).
+ */
+ regs->r31 = IA32_SEG_UNSCRAMBLE(IA32_SEG_DESCRIPTOR(IA32_GDT_OFFSET, IA32_PAGE_SIZE - 1,
+ 0, 0, 0, 1, 0, 0, 0));
+ /* Setup the segment selectors */
+ regs->r16 = (__USER_DS << 16) | __USER_DS; /* ES == DS, GS, FS are zero */
+ regs->r17 = (__USER_DS << 16) | __USER_CS; /* SS, CS; ia32_load_state() sets TSS and LDT */
+
+ ia32_load_segment_descriptors(current);
+ ia32_load_state(current);
+}
+
+int
+ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
+{
+ unsigned long stack_base;
+ struct vm_area_struct *mpnt;
+ struct mm_struct *mm = current->mm;
+ int i, ret;
+
+ stack_base = IA32_STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
+ mm->arg_start = bprm->p + stack_base;
+
+ bprm->p += stack_base;
+ if (bprm->loader)
+ bprm->loader += stack_base;
+ bprm->exec += stack_base;
+
+ mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!mpnt)
+ return -ENOMEM;
+
+ if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
+ >> PAGE_SHIFT)) {
+ kmem_cache_free(vm_area_cachep, mpnt);
+ return -ENOMEM;
+ }
+
+ memset(mpnt, 0, sizeof(*mpnt));
+
+ down_write(&current->mm->mmap_sem);
+ {
+ mpnt->vm_mm = current->mm;
+ mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
+ mpnt->vm_end = IA32_STACK_TOP;
+ if (executable_stack == EXSTACK_ENABLE_X)
+ mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
+ else if (executable_stack == EXSTACK_DISABLE_X)
+ mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
+ else
+ mpnt->vm_flags = VM_STACK_FLAGS;
+ mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
+ PAGE_COPY_EXEC: PAGE_COPY;
+ if ((ret = insert_vm_struct(current->mm, mpnt))) {
+ up_write(&current->mm->mmap_sem);
+ kmem_cache_free(vm_area_cachep, mpnt);
+ return ret;
+ }
+ current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
+ }
+
+ for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
+ struct page *page = bprm->page[i];
+ if (page) {
+ bprm->page[i] = NULL;
+ install_arg_page(mpnt, page, stack_base);
+ }
+ stack_base += PAGE_SIZE;
+ }
+ up_write(&current->mm->mmap_sem);
+
+ /* Can't do it in ia64_elf32_init(). Needs to be done before calls to
+ elf32_map() */
+ current->thread.ppl = ia32_init_pp_list();
+
+ return 0;
+}
+
+static void
+elf32_set_personality (void)
+{
+ set_personality(PER_LINUX32);
+ current->thread.map_base = IA32_PAGE_OFFSET/3;
+ current->thread.task_size = IA32_PAGE_OFFSET; /* use what Linux/x86 uses... */
+ set_fs(USER_DS); /* set addr limit for new TASK_SIZE */
+}
+
+static unsigned long
+elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
+{
+ unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK;
+
+ return ia32_do_mmap(filep, (addr & IA32_PAGE_MASK), eppnt->p_filesz + pgoff, prot, type,
+ eppnt->p_offset - pgoff);
+}
+
+#define cpu_uses_ia32el() (local_cpu_data->family > 0x1f)
+
+static int __init check_elf32_binfmt(void)
+{
+ if (cpu_uses_ia32el()) {
+ printk("Please use IA-32 EL for executing IA-32 binaries\n");
+ return unregister_binfmt(&elf_format);
+ }
+ return 0;
+}
+
+module_init(check_elf32_binfmt)
diff --git a/arch/ia64/ia32/elfcore32.h b/arch/ia64/ia32/elfcore32.h
new file mode 100644
index 00000000000..b73b8b6b10c
--- /dev/null
+++ b/arch/ia64/ia32/elfcore32.h
@@ -0,0 +1,138 @@
+/*
+ * IA-32 ELF core dump support.
+ *
+ * Copyright (C) 2003 Arun Sharma <arun.sharma@intel.com>
+ *
+ * Derived from the x86_64 version
+ */
+#ifndef _ELFCORE32_H_
+#define _ELFCORE32_H_
+
+#include <asm/intrinsics.h>
+#include <asm/uaccess.h>
+
+#define USE_ELF_CORE_DUMP 1
+
+/* Override elfcore.h */
+#define _LINUX_ELFCORE_H 1
+typedef unsigned int elf_greg_t;
+
+#define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct ia32_user_i387_struct elf_fpregset_t;
+typedef struct ia32_user_fxsr_struct elf_fpxregset_t;
+
+struct elf_siginfo
+{
+ int si_signo; /* signal number */
+ int si_code; /* extra code */
+ int si_errno; /* errno */
+};
+
+#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
+
+struct elf_prstatus
+{
+ struct elf_siginfo pr_info; /* Info associated with signal */
+ short pr_cursig; /* Current signal */
+ unsigned int pr_sigpend; /* Set of pending signals */
+ unsigned int pr_sighold; /* Set of held signals */
+ pid_t pr_pid;
+ pid_t pr_ppid;
+ pid_t pr_pgrp;
+ pid_t pr_sid;
+ struct compat_timeval pr_utime; /* User time */
+ struct compat_timeval pr_stime; /* System time */
+ struct compat_timeval pr_cutime; /* Cumulative user time */
+ struct compat_timeval pr_cstime; /* Cumulative system time */
+ elf_gregset_t pr_reg; /* GP registers */
+ int pr_fpvalid; /* True if math co-processor being used. */
+};
+
+#define ELF_PRARGSZ (80) /* Number of chars for args */
+
+struct elf_prpsinfo
+{
+ char pr_state; /* numeric process state */
+ char pr_sname; /* char for pr_state */
+ char pr_zomb; /* zombie */
+ char pr_nice; /* nice val */
+ unsigned int pr_flag; /* flags */
+ __u16 pr_uid;
+ __u16 pr_gid;
+ pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
+ /* Lots missing */
+ char pr_fname[16]; /* filename of executable */
+ char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
+};
+
+#define ELF_CORE_COPY_REGS(pr_reg, regs) \
+ pr_reg[0] = regs->r11; \
+ pr_reg[1] = regs->r9; \
+ pr_reg[2] = regs->r10; \
+ pr_reg[3] = regs->r14; \
+ pr_reg[4] = regs->r15; \
+ pr_reg[5] = regs->r13; \
+ pr_reg[6] = regs->r8; \
+ pr_reg[7] = regs->r16 & 0xffff; \
+ pr_reg[8] = (regs->r16 >> 16) & 0xffff; \
+ pr_reg[9] = (regs->r16 >> 32) & 0xffff; \
+ pr_reg[10] = (regs->r16 >> 48) & 0xffff; \
+ pr_reg[11] = regs->r1; \
+ pr_reg[12] = regs->cr_iip; \
+ pr_reg[13] = regs->r17 & 0xffff; \
+ pr_reg[14] = ia64_getreg(_IA64_REG_AR_EFLAG); \
+ pr_reg[15] = regs->r12; \
+ pr_reg[16] = (regs->r17 >> 16) & 0xffff;
+
+static inline void elf_core_copy_regs(elf_gregset_t *elfregs,
+ struct pt_regs *regs)
+{
+ ELF_CORE_COPY_REGS((*elfregs), regs)
+}
+
+static inline int elf_core_copy_task_regs(struct task_struct *t,
+ elf_gregset_t* elfregs)
+{
+ struct pt_regs *pp = ia64_task_regs(t);
+ ELF_CORE_COPY_REGS((*elfregs), pp);
+ return 1;
+}
+
+static inline int
+elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu)
+{
+ struct ia32_user_i387_struct *fpstate = (void*)fpu;
+ mm_segment_t old_fs;
+
+ if (!tsk_used_math(tsk))
+ return 0;
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ save_ia32_fpstate(tsk, (struct ia32_user_i387_struct __user *) fpstate);
+ set_fs(old_fs);
+
+ return 1;
+}
+
+#define ELF_CORE_COPY_XFPREGS 1
+static inline int
+elf_core_copy_task_xfpregs(struct task_struct *tsk, elf_fpxregset_t *xfpu)
+{
+ struct ia32_user_fxsr_struct *fpxstate = (void*) xfpu;
+ mm_segment_t old_fs;
+
+ if (!tsk_used_math(tsk))
+ return 0;
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ save_ia32_fpxstate(tsk, (struct ia32_user_fxsr_struct __user *) fpxstate);
+ set_fs(old_fs);
+
+ return 1;
+}
+
+#endif /* _ELFCORE32_H_ */
diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
new file mode 100644
index 00000000000..829a6d80711
--- /dev/null
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -0,0 +1,500 @@
+#include <asm/asmmacro.h>
+#include <asm/ia32.h>
+#include <asm/offsets.h>
+#include <asm/signal.h>
+#include <asm/thread_info.h>
+
+#include "../kernel/minstate.h"
+
+ /*
+ * execve() is special because in case of success, we need to
+ * setup a null register window frame (in case an IA-32 process
+ * is exec'ing an IA-64 program).
+ */
+ENTRY(ia32_execve)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(3)
+ alloc loc1=ar.pfs,3,2,4,0
+ mov loc0=rp
+ .body
+ zxt4 out0=in0 // filename
+ ;; // stop bit between alloc and call
+ zxt4 out1=in1 // argv
+ zxt4 out2=in2 // envp
+ add out3=16,sp // regs
+ br.call.sptk.few rp=sys32_execve
+1: cmp.ge p6,p0=r8,r0
+ mov ar.pfs=loc1 // restore ar.pfs
+ ;;
+(p6) mov ar.pfs=r0 // clear ar.pfs in case of success
+ sxt4 r8=r8 // return 64-bit result
+ mov rp=loc0
+ br.ret.sptk.few rp
+END(ia32_execve)
+
+ENTRY(ia32_clone)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
+ alloc r16=ar.pfs,5,2,6,0
+ DO_SAVE_SWITCH_STACK
+ mov loc0=rp
+ mov loc1=r16 // save ar.pfs across do_fork
+ .body
+ zxt4 out1=in1 // newsp
+ mov out3=16 // stacksize (compensates for 16-byte scratch area)
+ adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = &regs
+ mov out0=in0 // out0 = clone_flags
+ zxt4 out4=in2 // out4 = parent_tidptr
+ zxt4 out5=in4 // out5 = child_tidptr
+ br.call.sptk.many rp=do_fork
+.ret0: .restore sp
+ adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack
+ mov ar.pfs=loc1
+ mov rp=loc0
+ br.ret.sptk.many rp
+END(ia32_clone)
+
+ENTRY(sys32_rt_sigsuspend)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+ alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs
+ mov loc0=rp
+ mov out0=in0 // mask
+ mov out1=in1 // sigsetsize
+ mov out2=sp // out2 = &sigscratch
+ .fframe 16
+ adds sp=-16,sp // allocate dummy "sigscratch"
+ ;;
+ .body
+ br.call.sptk.many rp=ia32_rt_sigsuspend
+1: .restore sp
+ adds sp=16,sp
+ mov rp=loc0
+ mov ar.pfs=loc1
+ br.ret.sptk.many rp
+END(sys32_rt_sigsuspend)
+
+ENTRY(sys32_sigsuspend)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+ alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs
+ mov loc0=rp
+ mov out0=in2 // mask (first two args are ignored)
+ ;;
+ mov out1=sp // out1 = &sigscratch
+ .fframe 16
+ adds sp=-16,sp // allocate dummy "sigscratch"
+ .body
+ br.call.sptk.many rp=ia32_sigsuspend
+1: .restore sp
+ adds sp=16,sp
+ mov rp=loc0
+ mov ar.pfs=loc1
+ br.ret.sptk.many rp
+END(sys32_sigsuspend)
+
+GLOBAL_ENTRY(ia32_ret_from_clone)
+ PT_REGS_UNWIND_INFO(0)
+{ /*
+ * Some versions of gas generate bad unwind info if the first instruction of a
+ * procedure doesn't go into the first slot of a bundle. This is a workaround.
+ */
+ nop.m 0
+ nop.i 0
+ /*
+ * We need to call schedule_tail() to complete the scheduling process.
+ * Called by ia64_switch_to after do_fork()->copy_thread(). r8 contains the
+ * address of the previously executing task.
+ */
+ br.call.sptk.many rp=ia64_invoke_schedule_tail
+}
+.ret1:
+ adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+ ld4 r2=[r2]
+ ;;
+ mov r8=0
+ and r2=_TIF_SYSCALL_TRACEAUDIT,r2
+ ;;
+ cmp.ne p6,p0=r2,r0
+(p6) br.cond.spnt .ia32_strace_check_retval
+ ;; // prevent RAW on r8
+END(ia32_ret_from_clone)
+ // fall thrugh
+GLOBAL_ENTRY(ia32_ret_from_syscall)
+ PT_REGS_UNWIND_INFO(0)
+
+ cmp.ge p6,p7=r8,r0 // syscall executed successfully?
+ adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8
+ ;;
+ alloc r3=ar.pfs,0,0,0,0 // drop the syscall argument frame
+ st8 [r2]=r8 // store return value in slot for r8
+ br.cond.sptk.many ia64_leave_kernel
+END(ia32_ret_from_syscall)
+
+ //
+ // Invoke a system call, but do some tracing before and after the call.
+ // We MUST preserve the current register frame throughout this routine
+ // because some system calls (such as ia64_execve) directly
+ // manipulate ar.pfs.
+ //
+ // Input:
+ // r8 = syscall number
+ // b6 = syscall entry point
+ //
+GLOBAL_ENTRY(ia32_trace_syscall)
+ PT_REGS_UNWIND_INFO(0)
+ mov r3=-38
+ adds r2=IA64_PT_REGS_R8_OFFSET+16,sp
+ ;;
+ st8 [r2]=r3 // initialize return code to -ENOSYS
+ br.call.sptk.few rp=syscall_trace_enter // give parent a chance to catch syscall args
+.ret2: // Need to reload arguments (they may be changed by the tracing process)
+ adds r2=IA64_PT_REGS_R1_OFFSET+16,sp // r2 = &pt_regs.r1
+ adds r3=IA64_PT_REGS_R13_OFFSET+16,sp // r3 = &pt_regs.r13
+ mov r15=IA32_NR_syscalls
+ ;;
+ ld4 r8=[r2],IA64_PT_REGS_R9_OFFSET-IA64_PT_REGS_R1_OFFSET
+ movl r16=ia32_syscall_table
+ ;;
+ ld4 r33=[r2],8 // r9 == ecx
+ ld4 r37=[r3],16 // r13 == ebp
+ cmp.ltu.unc p6,p7=r8,r15
+ ;;
+ ld4 r34=[r2],8 // r10 == edx
+ ld4 r36=[r3],8 // r15 == edi
+(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number
+ ;;
+ ld8 r16=[r16]
+ ;;
+ ld4 r32=[r2],8 // r11 == ebx
+ mov b6=r16
+ ld4 r35=[r3],8 // r14 == esi
+ br.call.sptk.few rp=b6 // do the syscall
+.ia32_strace_check_retval:
+ cmp.lt p6,p0=r8,r0 // syscall failed?
+ adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8
+ ;;
+ st8.spill [r2]=r8 // store return value in slot for r8
+ br.call.sptk.few rp=syscall_trace_leave // give parent a chance to catch return value
+.ret4: alloc r2=ar.pfs,0,0,0,0 // drop the syscall argument frame
+ br.cond.sptk.many ia64_leave_kernel
+END(ia32_trace_syscall)
+
+GLOBAL_ENTRY(sys32_vfork)
+ alloc r16=ar.pfs,2,2,4,0;;
+ mov out0=IA64_CLONE_VFORK|IA64_CLONE_VM|SIGCHLD // out0 = clone_flags
+ br.cond.sptk.few .fork1 // do the work
+END(sys32_vfork)
+
+GLOBAL_ENTRY(sys32_fork)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
+ alloc r16=ar.pfs,2,2,4,0
+ mov out0=SIGCHLD // out0 = clone_flags
+ ;;
+.fork1:
+ mov loc0=rp
+ mov loc1=r16 // save ar.pfs across do_fork
+ DO_SAVE_SWITCH_STACK
+
+ .body
+
+ mov out1=0
+ mov out3=0
+ adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = &regs
+ br.call.sptk.few rp=do_fork
+.ret5: .restore sp
+ adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack
+ mov ar.pfs=loc1
+ mov rp=loc0
+ br.ret.sptk.many rp
+END(sys32_fork)
+
+ .rodata
+ .align 8
+ .globl ia32_syscall_table
+ia32_syscall_table:
+ data8 sys_ni_syscall /* 0 - old "setup(" system call*/
+ data8 sys_exit
+ data8 sys32_fork
+ data8 sys_read
+ data8 sys_write
+ data8 sys32_open /* 5 */
+ data8 sys_close
+ data8 sys32_waitpid
+ data8 sys_creat
+ data8 sys_link
+ data8 sys_unlink /* 10 */
+ data8 ia32_execve
+ data8 sys_chdir
+ data8 compat_sys_time
+ data8 sys_mknod
+ data8 sys_chmod /* 15 */
+ data8 sys_lchown /* 16-bit version */
+ data8 sys_ni_syscall /* old break syscall holder */
+ data8 sys_ni_syscall
+ data8 sys32_lseek
+ data8 sys_getpid /* 20 */
+ data8 compat_sys_mount
+ data8 sys_oldumount
+ data8 sys_setuid /* 16-bit version */
+ data8 sys_getuid /* 16-bit version */
+ data8 compat_sys_stime /* 25 */
+ data8 sys32_ptrace
+ data8 sys32_alarm
+ data8 sys_ni_syscall
+ data8 sys32_pause
+ data8 compat_sys_utime /* 30 */
+ data8 sys_ni_syscall /* old stty syscall holder */
+ data8 sys_ni_syscall /* old gtty syscall holder */
+ data8 sys_access
+ data8 sys_nice
+ data8 sys_ni_syscall /* 35 */ /* old ftime syscall holder */
+ data8 sys_sync
+ data8 sys_kill
+ data8 sys_rename
+ data8 sys_mkdir
+ data8 sys_rmdir /* 40 */
+ data8 sys_dup
+ data8 sys32_pipe
+ data8 compat_sys_times
+ data8 sys_ni_syscall /* old prof syscall holder */
+ data8 sys32_brk /* 45 */
+ data8 sys_setgid /* 16-bit version */
+ data8 sys_getgid /* 16-bit version */
+ data8 sys32_signal
+ data8 sys_geteuid /* 16-bit version */
+ data8 sys_getegid /* 16-bit version */ /* 50 */
+ data8 sys_acct
+ data8 sys_umount /* recycled never used phys( */
+ data8 sys_ni_syscall /* old lock syscall holder */
+ data8 compat_sys_ioctl
+ data8 compat_sys_fcntl /* 55 */
+ data8 sys_ni_syscall /* old mpx syscall holder */
+ data8 sys_setpgid
+ data8 sys_ni_syscall /* old ulimit syscall holder */
+ data8 sys_ni_syscall
+ data8 sys_umask /* 60 */
+ data8 sys_chroot
+ data8 sys_ustat
+ data8 sys_dup2
+ data8 sys_getppid
+ data8 sys_getpgrp /* 65 */
+ data8 sys_setsid
+ data8 sys32_sigaction
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_setreuid /* 16-bit version */ /* 70 */
+ data8 sys_setregid /* 16-bit version */
+ data8 sys32_sigsuspend
+ data8 compat_sys_sigpending
+ data8 sys_sethostname
+ data8 compat_sys_setrlimit /* 75 */
+ data8 compat_sys_old_getrlimit
+ data8 compat_sys_getrusage
+ data8 sys32_gettimeofday
+ data8 sys32_settimeofday
+ data8 sys32_getgroups16 /* 80 */
+ data8 sys32_setgroups16
+ data8 sys32_old_select
+ data8 sys_symlink
+ data8 sys_ni_syscall
+ data8 sys_readlink /* 85 */
+ data8 sys_uselib
+ data8 sys_swapon
+ data8 sys_reboot
+ data8 sys32_readdir
+ data8 sys32_mmap /* 90 */
+ data8 sys32_munmap
+ data8 sys_truncate
+ data8 sys_ftruncate
+ data8 sys_fchmod
+ data8 sys_fchown /* 16-bit version */ /* 95 */
+ data8 sys_getpriority
+ data8 sys_setpriority
+ data8 sys_ni_syscall /* old profil syscall holder */
+ data8 compat_sys_statfs
+ data8 compat_sys_fstatfs /* 100 */
+ data8 sys_ni_syscall /* ioperm */
+ data8 compat_sys_socketcall
+ data8 sys_syslog
+ data8 compat_sys_setitimer
+ data8 compat_sys_getitimer /* 105 */
+ data8 compat_sys_newstat
+ data8 compat_sys_newlstat
+ data8 compat_sys_newfstat
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall /* iopl */ /* 110 */
+ data8 sys_vhangup
+ data8 sys_ni_syscall /* used to be sys_idle */
+ data8 sys_ni_syscall
+ data8 compat_sys_wait4
+ data8 sys_swapoff /* 115 */
+ data8 sys32_sysinfo
+ data8 sys32_ipc
+ data8 sys_fsync
+ data8 sys32_sigreturn
+ data8 ia32_clone /* 120 */
+ data8 sys_setdomainname
+ data8 sys32_newuname
+ data8 sys32_modify_ldt
+ data8 sys_ni_syscall /* adjtimex */
+ data8 sys32_mprotect /* 125 */
+ data8 compat_sys_sigprocmask
+ data8 sys_ni_syscall /* create_module */
+ data8 sys_ni_syscall /* init_module */
+ data8 sys_ni_syscall /* delete_module */
+ data8 sys_ni_syscall /* get_kernel_syms */ /* 130 */
+ data8 sys_quotactl
+ data8 sys_getpgid
+ data8 sys_fchdir
+ data8 sys_ni_syscall /* sys_bdflush */
+ data8 sys_sysfs /* 135 */
+ data8 sys32_personality
+ data8 sys_ni_syscall /* for afs_syscall */
+ data8 sys_setfsuid /* 16-bit version */
+ data8 sys_setfsgid /* 16-bit version */
+ data8 sys_llseek /* 140 */
+ data8 compat_sys_getdents
+ data8 compat_sys_select
+ data8 sys_flock
+ data8 sys32_msync
+ data8 compat_sys_readv /* 145 */
+ data8 compat_sys_writev
+ data8 sys_getsid
+ data8 sys_fdatasync
+ data8 sys32_sysctl
+ data8 sys_mlock /* 150 */
+ data8 sys_munlock
+ data8 sys_mlockall
+ data8 sys_munlockall
+ data8 sys_sched_setparam
+ data8 sys_sched_getparam /* 155 */
+ data8 sys_sched_setscheduler
+ data8 sys_sched_getscheduler
+ data8 sys_sched_yield
+ data8 sys_sched_get_priority_max
+ data8 sys_sched_get_priority_min /* 160 */
+ data8 sys32_sched_rr_get_interval
+ data8 compat_sys_nanosleep
+ data8 sys32_mremap
+ data8 sys_setresuid /* 16-bit version */
+ data8 sys32_getresuid16 /* 16-bit version */ /* 165 */
+ data8 sys_ni_syscall /* vm86 */
+ data8 sys_ni_syscall /* sys_query_module */
+ data8 sys_poll
+ data8 sys_ni_syscall /* nfsservctl */
+ data8 sys_setresgid /* 170 */
+ data8 sys32_getresgid16
+ data8 sys_prctl
+ data8 sys32_rt_sigreturn
+ data8 sys32_rt_sigaction
+ data8 sys32_rt_sigprocmask /* 175 */
+ data8 sys_rt_sigpending
+ data8 compat_sys_rt_sigtimedwait
+ data8 sys32_rt_sigqueueinfo
+ data8 sys32_rt_sigsuspend
+ data8 sys32_pread /* 180 */
+ data8 sys32_pwrite
+ data8 sys_chown /* 16-bit version */
+ data8 sys_getcwd
+ data8 sys_capget
+ data8 sys_capset /* 185 */
+ data8 sys32_sigaltstack
+ data8 sys32_sendfile
+ data8 sys_ni_syscall /* streams1 */
+ data8 sys_ni_syscall /* streams2 */
+ data8 sys32_vfork /* 190 */
+ data8 compat_sys_getrlimit
+ data8 sys32_mmap2
+ data8 sys32_truncate64
+ data8 sys32_ftruncate64
+ data8 sys32_stat64 /* 195 */
+ data8 sys32_lstat64
+ data8 sys32_fstat64
+ data8 sys_lchown
+ data8 sys_getuid
+ data8 sys_getgid /* 200 */
+ data8 sys_geteuid
+ data8 sys_getegid
+ data8 sys_setreuid
+ data8 sys_setregid
+ data8 sys_getgroups /* 205 */
+ data8 sys_setgroups
+ data8 sys_fchown
+ data8 sys_setresuid
+ data8 sys_getresuid
+ data8 sys_setresgid /* 210 */
+ data8 sys_getresgid
+ data8 sys_chown
+ data8 sys_setuid
+ data8 sys_setgid
+ data8 sys_setfsuid /* 215 */
+ data8 sys_setfsgid
+ data8 sys_pivot_root
+ data8 sys_mincore
+ data8 sys_madvise
+ data8 compat_sys_getdents64 /* 220 */
+ data8 compat_sys_fcntl64
+ data8 sys_ni_syscall /* reserved for TUX */
+ data8 sys_ni_syscall /* reserved for Security */
+ data8 sys_gettid
+ data8 sys_readahead /* 225 */
+ data8 sys_setxattr
+ data8 sys_lsetxattr
+ data8 sys_fsetxattr
+ data8 sys_getxattr
+ data8 sys_lgetxattr /* 230 */
+ data8 sys_fgetxattr
+ data8 sys_listxattr
+ data8 sys_llistxattr
+ data8 sys_flistxattr
+ data8 sys_removexattr /* 235 */
+ data8 sys_lremovexattr
+ data8 sys_fremovexattr
+ data8 sys_tkill
+ data8 sys_sendfile64
+ data8 compat_sys_futex /* 240 */
+ data8 compat_sys_sched_setaffinity
+ data8 compat_sys_sched_getaffinity
+ data8 sys32_set_thread_area
+ data8 sys32_get_thread_area
+ data8 compat_sys_io_setup /* 245 */
+ data8 sys_io_destroy
+ data8 compat_sys_io_getevents
+ data8 compat_sys_io_submit
+ data8 sys_io_cancel
+ data8 sys_fadvise64 /* 250 */
+ data8 sys_ni_syscall
+ data8 sys_exit_group
+ data8 sys_lookup_dcookie
+ data8 sys_epoll_create
+ data8 sys32_epoll_ctl /* 255 */
+ data8 sys32_epoll_wait
+ data8 sys_remap_file_pages
+ data8 sys_set_tid_address
+ data8 sys32_timer_create
+ data8 compat_sys_timer_settime /* 260 */
+ data8 compat_sys_timer_gettime
+ data8 sys_timer_getoverrun
+ data8 sys_timer_delete
+ data8 compat_sys_clock_settime
+ data8 compat_sys_clock_gettime /* 265 */
+ data8 compat_sys_clock_getres
+ data8 compat_sys_clock_nanosleep
+ data8 compat_sys_statfs64
+ data8 compat_sys_fstatfs64
+ data8 sys_tgkill /* 270 */
+ data8 compat_sys_utimes
+ data8 sys32_fadvise64_64
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall /* 275 */
+ data8 sys_ni_syscall
+ data8 compat_sys_mq_open
+ data8 sys_mq_unlink
+ data8 compat_sys_mq_timedsend
+ data8 compat_sys_mq_timedreceive /* 280 */
+ data8 compat_sys_mq_notify
+ data8 compat_sys_mq_getsetattr
+ data8 sys_ni_syscall /* reserved for kexec */
+ data8 compat_sys_waitid
+
+ // guard against failures to increase IA32_NR_syscalls
+ .org ia32_syscall_table + 8*IA32_NR_syscalls
diff --git a/arch/ia64/ia32/ia32_ioctl.c b/arch/ia64/ia32/ia32_ioctl.c
new file mode 100644
index 00000000000..9845dabe261
--- /dev/null
+++ b/arch/ia64/ia32/ia32_ioctl.c
@@ -0,0 +1,48 @@
+/*
+ * IA32 Architecture-specific ioctl shim code
+ *
+ * Copyright (C) 2000 VA Linux Co
+ * Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
+ * Copyright (C) 2001-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/signal.h> /* argh, msdos_fs.h isn't self-contained... */
+#include <linux/syscalls.h>
+#include "ia32priv.h"
+
+#define INCLUDES
+#include "compat_ioctl.c"
+#include <asm/ioctl32.h>
+
+#define IOCTL_NR(a) ((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT))
+
+#define DO_IOCTL(fd, cmd, arg) ({ \
+ int _ret; \
+ mm_segment_t _old_fs = get_fs(); \
+ \
+ set_fs(KERNEL_DS); \
+ _ret = sys_ioctl(fd, cmd, (unsigned long)arg); \
+ set_fs(_old_fs); \
+ _ret; \
+})
+
+#define CODE
+#include "compat_ioctl.c"
+
+typedef int (* ioctl32_handler_t)(unsigned int, unsigned int, unsigned long, struct file *);
+
+#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL((cmd),sys_ioctl)
+#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl32_handler_t)(handler), NULL },
+#define IOCTL_TABLE_START \
+ struct ioctl_trans ioctl_start[] = {
+#define IOCTL_TABLE_END \
+ };
+
+IOCTL_TABLE_START
+#define DECLARES
+#include "compat_ioctl.c"
+#include <linux/compat_ioctl.h>
+IOCTL_TABLE_END
+
+int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/arch/ia64/ia32/ia32_ldt.c b/arch/ia64/ia32/ia32_ldt.c
new file mode 100644
index 00000000000..a152738c7d0
--- /dev/null
+++ b/arch/ia64/ia32/ia32_ldt.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2001, 2004 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Adapted from arch/i386/kernel/ldt.c
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+
+#include <asm/uaccess.h>
+
+#include "ia32priv.h"
+
+/*
+ * read_ldt() is not really atomic - this is not a problem since synchronization of reads
+ * and writes done to the LDT has to be assured by user-space anyway. Writes are atomic,
+ * to protect the security checks done on new descriptors.
+ */
+static int
+read_ldt (void __user *ptr, unsigned long bytecount)
+{
+ unsigned long bytes_left, n;
+ char __user *src, *dst;
+ char buf[256]; /* temporary buffer (don't overflow kernel stack!) */
+
+ if (bytecount > IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE)
+ bytecount = IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE;
+
+ bytes_left = bytecount;
+
+ src = (void __user *) IA32_LDT_OFFSET;
+ dst = ptr;
+
+ while (bytes_left) {
+ n = sizeof(buf);
+ if (n > bytes_left)
+ n = bytes_left;
+
+ /*
+ * We know we're reading valid memory, but we still must guard against
+ * running out of memory.
+ */
+ if (__copy_from_user(buf, src, n))
+ return -EFAULT;
+
+ if (copy_to_user(dst, buf, n))
+ return -EFAULT;
+
+ src += n;
+ dst += n;
+ bytes_left -= n;
+ }
+ return bytecount;
+}
+
+static int
+read_default_ldt (void __user * ptr, unsigned long bytecount)
+{
+ unsigned long size;
+ int err;
+
+ /* XXX fix me: should return equivalent of default_ldt[0] */
+ err = 0;
+ size = 8;
+ if (size > bytecount)
+ size = bytecount;
+
+ err = size;
+ if (clear_user(ptr, size))
+ err = -EFAULT;
+
+ return err;
+}
+
+static int
+write_ldt (void __user * ptr, unsigned long bytecount, int oldmode)
+{
+ struct ia32_user_desc ldt_info;
+ __u64 entry;
+ int ret;
+
+ if (bytecount != sizeof(ldt_info))
+ return -EINVAL;
+ if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+ return -EFAULT;
+
+ if (ldt_info.entry_number >= IA32_LDT_ENTRIES)
+ return -EINVAL;
+ if (ldt_info.contents == 3) {
+ if (oldmode)
+ return -EINVAL;
+ if (ldt_info.seg_not_present == 0)
+ return -EINVAL;
+ }
+
+ if (ldt_info.base_addr == 0 && ldt_info.limit == 0
+ && (oldmode || (ldt_info.contents == 0 && ldt_info.read_exec_only == 1
+ && ldt_info.seg_32bit == 0 && ldt_info.limit_in_pages == 0
+ && ldt_info.seg_not_present == 1 && ldt_info.useable == 0)))
+ /* allow LDTs to be cleared by the user */
+ entry = 0;
+ else
+ /* we must set the "Accessed" bit as IVE doesn't emulate it */
+ entry = IA32_SEG_DESCRIPTOR(ldt_info.base_addr, ldt_info.limit,
+ (((ldt_info.read_exec_only ^ 1) << 1)
+ | (ldt_info.contents << 2)) | 1,
+ 1, 3, ldt_info.seg_not_present ^ 1,
+ (oldmode ? 0 : ldt_info.useable),
+ ldt_info.seg_32bit,
+ ldt_info.limit_in_pages);
+ /*
+ * Install the new entry. We know we're accessing valid (mapped) user-level
+ * memory, but we still need to guard against out-of-memory, hence we must use
+ * put_user().
+ */
+ ret = __put_user(entry, (__u64 __user *) IA32_LDT_OFFSET + ldt_info.entry_number);
+ ia32_load_segment_descriptors(current);
+ return ret;
+}
+
+asmlinkage int
+sys32_modify_ldt (int func, unsigned int ptr, unsigned int bytecount)
+{
+ int ret = -ENOSYS;
+
+ switch (func) {
+ case 0:
+ ret = read_ldt(compat_ptr(ptr), bytecount);
+ break;
+ case 1:
+ ret = write_ldt(compat_ptr(ptr), bytecount, 1);
+ break;
+ case 2:
+ ret = read_default_ldt(compat_ptr(ptr), bytecount);
+ break;
+ case 0x11:
+ ret = write_ldt(compat_ptr(ptr), bytecount, 0);
+ break;
+ }
+ return ret;
+}
diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c
new file mode 100644
index 00000000000..19b02adce68
--- /dev/null
+++ b/arch/ia64/ia32/ia32_signal.c
@@ -0,0 +1,1036 @@
+/*
+ * IA32 Architecture-specific signal handling support.
+ *
+ * Copyright (C) 1999, 2001-2002, 2005 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 2000 VA Linux Co
+ * Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
+ *
+ * Derived from i386 and Alpha versions.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/personality.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+#include <linux/compat.h>
+
+#include <asm/intrinsics.h>
+#include <asm/uaccess.h>
+#include <asm/rse.h>
+#include <asm/sigcontext.h>
+#include <asm/segment.h>
+
+#include "ia32priv.h"
+
+#include "../kernel/sigframe.h"
+
+#define A(__x) ((unsigned long)(__x))
+
+#define DEBUG_SIG 0
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+#define __IA32_NR_sigreturn 119
+#define __IA32_NR_rt_sigreturn 173
+
+struct sigframe_ia32
+{
+ int pretcode;
+ int sig;
+ struct sigcontext_ia32 sc;
+ struct _fpstate_ia32 fpstate;
+ unsigned int extramask[_COMPAT_NSIG_WORDS-1];
+ char retcode[8];
+};
+
+struct rt_sigframe_ia32
+{
+ int pretcode;
+ int sig;
+ int pinfo;
+ int puc;
+ compat_siginfo_t info;
+ struct ucontext_ia32 uc;
+ struct _fpstate_ia32 fpstate;
+ char retcode[8];
+};
+
+int
+copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from)
+{
+ unsigned long tmp;
+ int err;
+
+ if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
+ return -EFAULT;
+
+ err = __get_user(to->si_signo, &from->si_signo);
+ err |= __get_user(to->si_errno, &from->si_errno);
+ err |= __get_user(to->si_code, &from->si_code);
+
+ if (to->si_code < 0)
+ err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE);
+ else {
+ switch (to->si_code >> 16) {
+ case __SI_CHLD >> 16:
+ err |= __get_user(to->si_utime, &from->si_utime);
+ err |= __get_user(to->si_stime, &from->si_stime);
+ err |= __get_user(to->si_status, &from->si_status);
+ default:
+ err |= __get_user(to->si_pid, &from->si_pid);
+ err |= __get_user(to->si_uid, &from->si_uid);
+ break;
+ case __SI_FAULT >> 16:
+ err |= __get_user(tmp, &from->si_addr);
+ to->si_addr = (void __user *) tmp;
+ break;
+ case __SI_POLL >> 16:
+ err |= __get_user(to->si_band, &from->si_band);
+ err |= __get_user(to->si_fd, &from->si_fd);
+ break;
+ case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
+ case __SI_MESGQ >> 16:
+ err |= __get_user(to->si_pid, &from->si_pid);
+ err |= __get_user(to->si_uid, &from->si_uid);
+ err |= __get_user(to->si_int, &from->si_int);
+ break;
+ }
+ }
+ return err;
+}
+
+int
+copy_siginfo_to_user32 (compat_siginfo_t __user *to, siginfo_t *from)
+{
+ unsigned int addr;
+ int err;
+
+ if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
+ return -EFAULT;
+
+ /* If you change siginfo_t structure, please be sure
+ this code is fixed accordingly.
+ It should never copy any pad contained in the structure
+ to avoid security leaks, but must copy the generic
+ 3 ints plus the relevant union member.
+ This routine must convert siginfo from 64bit to 32bit as well
+ at the same time. */
+ err = __put_user(from->si_signo, &to->si_signo);
+ err |= __put_user(from->si_errno, &to->si_errno);
+ err |= __put_user((short)from->si_code, &to->si_code);
+ if (from->si_code < 0)
+ err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE);
+ else {
+ switch (from->si_code >> 16) {
+ case __SI_CHLD >> 16:
+ err |= __put_user(from->si_utime, &to->si_utime);
+ err |= __put_user(from->si_stime, &to->si_stime);
+ err |= __put_user(from->si_status, &to->si_status);
+ default:
+ err |= __put_user(from->si_pid, &to->si_pid);
+ err |= __put_user(from->si_uid, &to->si_uid);
+ break;
+ case __SI_FAULT >> 16:
+ /* avoid type-checking warnings by copying _pad[0] in lieu of si_addr... */
+ err |= __put_user(from->_sifields._pad[0], &to->si_addr);
+ break;
+ case __SI_POLL >> 16:
+ err |= __put_user(from->si_band, &to->si_band);
+ err |= __put_user(from->si_fd, &to->si_fd);
+ break;
+ case __SI_TIMER >> 16:
+ err |= __put_user(from->si_tid, &to->si_tid);
+ err |= __put_user(from->si_overrun, &to->si_overrun);
+ addr = (unsigned long) from->si_ptr;
+ err |= __put_user(addr, &to->si_ptr);
+ break;
+ case __SI_RT >> 16: /* Not generated by the kernel as of now. */
+ case __SI_MESGQ >> 16:
+ err |= __put_user(from->si_uid, &to->si_uid);
+ err |= __put_user(from->si_pid, &to->si_pid);
+ addr = (unsigned long) from->si_ptr;
+ err |= __put_user(addr, &to->si_ptr);
+ break;
+ }
+ }
+ return err;
+}
+
+
+/*
+ * SAVE and RESTORE of ia32 fpstate info, from ia64 current state
+ * Used in exception handler to pass the fpstate to the user, and restore
+ * the fpstate while returning from the exception handler.
+ *
+ * fpstate info and their mapping to IA64 regs:
+ * fpstate REG(BITS) Attribute Comments
+ * cw ar.fcr(0:12) with bits 7 and 6 not used
+ * sw ar.fsr(0:15)
+ * tag ar.fsr(16:31) with odd numbered bits not used
+ * (read returns 0, writes ignored)
+ * ipoff ar.fir(0:31)
+ * cssel ar.fir(32:47)
+ * dataoff ar.fdr(0:31)
+ * datasel ar.fdr(32:47)
+ *
+ * _st[(0+TOS)%8] f8
+ * _st[(1+TOS)%8] f9
+ * _st[(2+TOS)%8] f10
+ * _st[(3+TOS)%8] f11 (f8..f11 from ptregs)
+ * : : : (f12..f15 from live reg)
+ * : : :
+ * _st[(7+TOS)%8] f15 TOS=sw.top(bits11:13)
+ *
+ * status Same as sw RO
+ * magic 0 as X86_FXSR_MAGIC in ia32
+ * mxcsr Bits(7:15)=ar.fcr(39:47)
+ * Bits(0:5) =ar.fsr(32:37) with bit 6 reserved
+ * _xmm[0..7] f16..f31 (live registers)
+ * with _xmm[0]
+ * Bit(64:127)=f17(0:63)
+ * Bit(0:63)=f16(0:63)
+ * All other fields unused...
+ */
+
+static int
+save_ia32_fpstate_live (struct _fpstate_ia32 __user *save)
+{
+ struct task_struct *tsk = current;
+ struct pt_regs *ptp;
+ struct _fpreg_ia32 *fpregp;
+ char buf[32];
+ unsigned long fsr, fcr, fir, fdr;
+ unsigned long new_fsr;
+ unsigned long num128[2];
+ unsigned long mxcsr=0;
+ int fp_tos, fr8_st_map;
+
+ if (!access_ok(VERIFY_WRITE, save, sizeof(*save)))
+ return -EFAULT;
+
+ /* Read in fsr, fcr, fir, fdr and copy onto fpstate */
+ fsr = ia64_getreg(_IA64_REG_AR_FSR);
+ fcr = ia64_getreg(_IA64_REG_AR_FCR);
+ fir = ia64_getreg(_IA64_REG_AR_FIR);
+ fdr = ia64_getreg(_IA64_REG_AR_FDR);
+
+ /*
+ * We need to clear the exception state before calling the signal handler. Clear
+ * the bits 15, bits 0-7 in fp status word. Similar to the functionality of fnclex
+ * instruction.
+ */
+ new_fsr = fsr & ~0x80ff;
+ ia64_setreg(_IA64_REG_AR_FSR, new_fsr);
+
+ __put_user(fcr & 0xffff, &save->cw);
+ __put_user(fsr & 0xffff, &save->sw);
+ __put_user((fsr>>16) & 0xffff, &save->tag);
+ __put_user(fir, &save->ipoff);
+ __put_user((fir>>32) & 0xffff, &save->cssel);
+ __put_user(fdr, &save->dataoff);
+ __put_user((fdr>>32) & 0xffff, &save->datasel);
+ __put_user(fsr & 0xffff, &save->status);
+
+ mxcsr = ((fcr>>32) & 0xff80) | ((fsr>>32) & 0x3f);
+ __put_user(mxcsr & 0xffff, &save->mxcsr);
+ __put_user( 0, &save->magic); //#define X86_FXSR_MAGIC 0x0000
+
+ /*
+ * save f8..f11 from pt_regs
+ * save f12..f15 from live register set
+ */
+ /*
+ * Find the location where f8 has to go in fp reg stack. This depends on
+ * TOP(11:13) field of sw. Other f reg continue sequentially from where f8 maps
+ * to.
+ */
+ fp_tos = (fsr>>11)&0x7;
+ fr8_st_map = (8-fp_tos)&0x7;
+ ptp = ia64_task_regs(tsk);
+ fpregp = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15);
+ ia64f2ia32f(fpregp, &ptp->f8);
+ copy_to_user(&save->_st[(0+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
+ ia64f2ia32f(fpregp, &ptp->f9);
+ copy_to_user(&save->_st[(1+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
+ ia64f2ia32f(fpregp, &ptp->f10);
+ copy_to_user(&save->_st[(2+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
+ ia64f2ia32f(fpregp, &ptp->f11);
+ copy_to_user(&save->_st[(3+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
+
+ ia64_stfe(fpregp, 12);
+ copy_to_user(&save->_st[(4+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
+ ia64_stfe(fpregp, 13);
+ copy_to_user(&save->_st[(5+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
+ ia64_stfe(fpregp, 14);
+ copy_to_user(&save->_st[(6+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
+ ia64_stfe(fpregp, 15);
+ copy_to_user(&save->_st[(7+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
+
+ ia64_stf8(&num128[0], 16);
+ ia64_stf8(&num128[1], 17);
+ copy_to_user(&save->_xmm[0], num128, sizeof(struct _xmmreg_ia32));
+
+ ia64_stf8(&num128[0], 18);
+ ia64_stf8(&num128[1], 19);
+ copy_to_user(&save->_xmm[1], num128, sizeof(struct _xmmreg_ia32));
+
+ ia64_stf8(&num128[0], 20);
+ ia64_stf8(&num128[1], 21);
+ copy_to_user(&save->_xmm[2], num128, sizeof(struct _xmmreg_ia32));
+
+ ia64_stf8(&num128[0], 22);
+ ia64_stf8(&num128[1], 23);
+ copy_to_user(&save->_xmm[3], num128, sizeof(struct _xmmreg_ia32));
+
+ ia64_stf8(&num128[0], 24);
+ ia64_stf8(&num128[1], 25);
+ copy_to_user(&save->_xmm[4], num128, sizeof(struct _xmmreg_ia32));
+
+ ia64_stf8(&num128[0], 26);
+ ia64_stf8(&num128[1], 27);
+ copy_to_user(&save->_xmm[5], num128, sizeof(struct _xmmreg_ia32));
+
+ ia64_stf8(&num128[0], 28);
+ ia64_stf8(&num128[1], 29);
+ copy_to_user(&save->_xmm[6], num128, sizeof(struct _xmmreg_ia32));
+
+ ia64_stf8(&num128[0], 30);
+ ia64_stf8(&num128[1], 31);
+ copy_to_user(&save->_xmm[7], num128, sizeof(struct _xmmreg_ia32));
+ return 0;
+}
+
+static int
+restore_ia32_fpstate_live (struct _fpstate_ia32 __user *save)
+{
+ struct task_struct *tsk = current;
+ struct pt_regs *ptp;
+ unsigned int lo, hi;
+ unsigned long num128[2];
+ unsigned long num64, mxcsr;
+ struct _fpreg_ia32 *fpregp;
+ char buf[32];
+ unsigned long fsr, fcr, fir, fdr;
+ int fp_tos, fr8_st_map;
+
+ if (!access_ok(VERIFY_READ, save, sizeof(*save)))
+ return(-EFAULT);
+
+ /*
+ * Updating fsr, fcr, fir, fdr.
+ * Just a bit more complicated than save.
+ * - Need to make sure that we don't write any value other than the
+ * specific fpstate info
+ * - Need to make sure that the untouched part of frs, fdr, fir, fcr
+ * should remain same while writing.
+ * So, we do a read, change specific fields and write.
+ */
+ fsr = ia64_getreg(_IA64_REG_AR_FSR);
+ fcr = ia64_getreg(_IA64_REG_AR_FCR);
+ fir = ia64_getreg(_IA64_REG_AR_FIR);
+ fdr = ia64_getreg(_IA64_REG_AR_FDR);
+
+ __get_user(mxcsr, (unsigned int __user *)&save->mxcsr);
+ /* setting bits 0..5 8..12 with cw and 39..47 from mxcsr */
+ __get_user(lo, (unsigned int __user *)&save->cw);
+ num64 = mxcsr & 0xff10;
+ num64 = (num64 << 32) | (lo & 0x1f3f);
+ fcr = (fcr & (~0xff1000001f3fUL)) | num64;
+
+ /* setting bits 0..31 with sw and tag and 32..37 from mxcsr */
+ __get_user(lo, (unsigned int __user *)&save->sw);
+ /* set bits 15,7 (fsw.b, fsw.es) to reflect the current error status */
+ if ( !(lo & 0x7f) )
+ lo &= (~0x8080);
+ __get_user(hi, (unsigned int __user *)&save->tag);
+ num64 = mxcsr & 0x3f;
+ num64 = (num64 << 16) | (hi & 0xffff);
+ num64 = (num64 << 16) | (lo & 0xffff);
+ fsr = (fsr & (~0x3fffffffffUL)) | num64;
+
+ /* setting bits 0..47 with cssel and ipoff */
+ __get_user(lo, (unsigned int __user *)&save->ipoff);
+ __get_user(hi, (unsigned int __user *)&save->cssel);
+ num64 = hi & 0xffff;
+ num64 = (num64 << 32) | lo;
+ fir = (fir & (~0xffffffffffffUL)) | num64;
+
+ /* setting bits 0..47 with datasel and dataoff */
+ __get_user(lo, (unsigned int __user *)&save->dataoff);
+ __get_user(hi, (unsigned int __user *)&save->datasel);
+ num64 = hi & 0xffff;
+ num64 = (num64 << 32) | lo;
+ fdr = (fdr & (~0xffffffffffffUL)) | num64;
+
+ ia64_setreg(_IA64_REG_AR_FSR, fsr);
+ ia64_setreg(_IA64_REG_AR_FCR, fcr);
+ ia64_setreg(_IA64_REG_AR_FIR, fir);
+ ia64_setreg(_IA64_REG_AR_FDR, fdr);
+
+ /*
+ * restore f8..f11 onto pt_regs
+ * restore f12..f15 onto live registers
+ */
+ /*
+ * Find the location where f8 has to go in fp reg stack. This depends on
+ * TOP(11:13) field of sw. Other f reg continue sequentially from where f8 maps
+ * to.
+ */
+ fp_tos = (fsr>>11)&0x7;
+ fr8_st_map = (8-fp_tos)&0x7;
+ fpregp = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15);
+
+ ptp = ia64_task_regs(tsk);
+ copy_from_user(fpregp, &save->_st[(0+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
+ ia32f2ia64f(&ptp->f8, fpregp);
+ copy_from_user(fpregp, &save->_st[(1+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
+ ia32f2ia64f(&ptp->f9, fpregp);
+ copy_from_user(fpregp, &save->_st[(2+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
+ ia32f2ia64f(&ptp->f10, fpregp);
+ copy_from_user(fpregp, &save->_st[(3+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
+ ia32f2ia64f(&ptp->f11, fpregp);
+
+ copy_from_user(fpregp, &save->_st[(4+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
+ ia64_ldfe(12, fpregp);
+ copy_from_user(fpregp, &save->_st[(5+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
+ ia64_ldfe(13, fpregp);
+ copy_from_user(fpregp, &save->_st[(6+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
+ ia64_ldfe(14, fpregp);
+ copy_from_user(fpregp, &save->_st[(7+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
+ ia64_ldfe(15, fpregp);
+
+ copy_from_user(num128, &save->_xmm[0], sizeof(struct _xmmreg_ia32));
+ ia64_ldf8(16, &num128[0]);
+ ia64_ldf8(17, &num128[1]);
+
+ copy_from_user(num128, &save->_xmm[1], sizeof(struct _xmmreg_ia32));
+ ia64_ldf8(18, &num128[0]);
+ ia64_ldf8(19, &num128[1]);
+
+ copy_from_user(num128, &save->_xmm[2], sizeof(struct _xmmreg_ia32));
+ ia64_ldf8(20, &num128[0]);
+ ia64_ldf8(21, &num128[1]);
+
+ copy_from_user(num128, &save->_xmm[3], sizeof(struct _xmmreg_ia32));
+ ia64_ldf8(22, &num128[0]);
+ ia64_ldf8(23, &num128[1]);
+
+ copy_from_user(num128, &save->_xmm[4], sizeof(struct _xmmreg_ia32));
+ ia64_ldf8(24, &num128[0]);
+ ia64_ldf8(25, &num128[1]);
+
+ copy_from_user(num128, &save->_xmm[5], sizeof(struct _xmmreg_ia32));
+ ia64_ldf8(26, &num128[0]);
+ ia64_ldf8(27, &num128[1]);
+
+ copy_from_user(num128, &save->_xmm[6], sizeof(struct _xmmreg_ia32));
+ ia64_ldf8(28, &num128[0]);
+ ia64_ldf8(29, &num128[1]);
+
+ copy_from_user(num128, &save->_xmm[7], sizeof(struct _xmmreg_ia32));
+ ia64_ldf8(30, &num128[0]);
+ ia64_ldf8(31, &num128[1]);
+ return 0;
+}
+
+static inline void
+sigact_set_handler (struct k_sigaction *sa, unsigned int handler, unsigned int restorer)
+{
+ if (handler + 1 <= 2)
+ /* SIG_DFL, SIG_IGN, or SIG_ERR: must sign-extend to 64-bits */
+ sa->sa.sa_handler = (__sighandler_t) A((int) handler);
+ else
+ sa->sa.sa_handler = (__sighandler_t) (((unsigned long) restorer << 32) | handler);
+}
+
+long
+__ia32_rt_sigsuspend (compat_sigset_t *sset, unsigned int sigsetsize, struct sigscratch *scr)
+{
+ extern long ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall);
+ sigset_t oldset, set;
+
+ scr->scratch_unat = 0; /* avoid leaking kernel bits to user level */
+ memset(&set, 0, sizeof(&set));
+
+ if (memcpy(&set.sig, &sset->sig, sigsetsize))
+ return -EFAULT;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+
+ spin_lock_irq(&current->sighand->siglock);
+ {
+ oldset = current->blocked;
+ current->blocked = set;
+ recalc_sigpending();
+ }
+ spin_unlock_irq(&current->sighand->siglock);
+
+ /*
+ * The return below usually returns to the signal handler. We need to pre-set the
+ * correct error code here to ensure that the right values get saved in sigcontext
+ * by ia64_do_signal.
+ */
+ scr->pt.r8 = -EINTR;
+ while (1) {
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ if (ia64_do_signal(&oldset, scr, 1))
+ return -EINTR;
+ }
+}
+
+asmlinkage long
+ia32_rt_sigsuspend (compat_sigset_t __user *uset, unsigned int sigsetsize, struct sigscratch *scr)
+{
+ compat_sigset_t set;
+
+ if (sigsetsize > sizeof(compat_sigset_t))
+ return -EINVAL;
+
+ if (copy_from_user(&set.sig, &uset->sig, sigsetsize))
+ return -EFAULT;
+
+ return __ia32_rt_sigsuspend(&set, sigsetsize, scr);
+}
+
+asmlinkage long
+ia32_sigsuspend (unsigned int mask, struct sigscratch *scr)
+{
+ return __ia32_rt_sigsuspend((compat_sigset_t *) &mask, sizeof(mask), scr);
+}
+
+asmlinkage long
+sys32_signal (int sig, unsigned int handler)
+{
+ struct k_sigaction new_sa, old_sa;
+ int ret;
+
+ sigact_set_handler(&new_sa, handler, 0);
+ new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
+
+ ret = do_sigaction(sig, &new_sa, &old_sa);
+
+ return ret ? ret : IA32_SA_HANDLER(&old_sa);
+}
+
+asmlinkage long
+sys32_rt_sigaction (int sig, struct sigaction32 __user *act,
+ struct sigaction32 __user *oact, unsigned int sigsetsize)
+{
+ struct k_sigaction new_ka, old_ka;
+ unsigned int handler, restorer;
+ int ret;
+
+ /* XXX: Don't preclude handling different sized sigset_t's. */
+ if (sigsetsize != sizeof(compat_sigset_t))
+ return -EINVAL;
+
+ if (act) {
+ ret = get_user(handler, &act->sa_handler);
+ ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
+ ret |= get_user(restorer, &act->sa_restorer);
+ ret |= copy_from_user(&new_ka.sa.sa_mask, &act->sa_mask, sizeof(compat_sigset_t));
+ if (ret)
+ return -EFAULT;
+
+ sigact_set_handler(&new_ka, handler, restorer);
+ }
+
+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+ if (!ret && oact) {
+ ret = put_user(IA32_SA_HANDLER(&old_ka), &oact->sa_handler);
+ ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+ ret |= put_user(IA32_SA_RESTORER(&old_ka), &oact->sa_restorer);
+ ret |= copy_to_user(&oact->sa_mask, &old_ka.sa.sa_mask, sizeof(compat_sigset_t));
+ }
+ return ret;
+}
+
+
+asmlinkage long
+sys32_rt_sigprocmask (int how, compat_sigset_t __user *set, compat_sigset_t __user *oset,
+ unsigned int sigsetsize)
+{
+ mm_segment_t old_fs = get_fs();
+ sigset_t s;
+ long ret;
+
+ if (sigsetsize > sizeof(s))
+ return -EINVAL;
+
+ if (set) {
+ memset(&s, 0, sizeof(s));
+ if (copy_from_user(&s.sig, set, sigsetsize))
+ return -EFAULT;
+ }
+ set_fs(KERNEL_DS);
+ ret = sys_rt_sigprocmask(how,
+ set ? (sigset_t __user *) &s : NULL,
+ oset ? (sigset_t __user *) &s : NULL, sizeof(s));
+ set_fs(old_fs);
+ if (ret)
+ return ret;
+ if (oset) {
+ if (copy_to_user(oset, &s.sig, sigsetsize))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+asmlinkage long
+sys32_rt_sigqueueinfo (int pid, int sig, compat_siginfo_t __user *uinfo)
+{
+ mm_segment_t old_fs = get_fs();
+ siginfo_t info;
+ int ret;
+
+ if (copy_siginfo_from_user32(&info, uinfo))
+ return -EFAULT;
+ set_fs(KERNEL_DS);
+ ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *) &info);
+ set_fs(old_fs);
+ return ret;
+}
+
+asmlinkage long
+sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigaction32 __user *oact)
+{
+ struct k_sigaction new_ka, old_ka;
+ unsigned int handler, restorer;
+ int ret;
+
+ if (act) {
+ compat_old_sigset_t mask;
+
+ ret = get_user(handler, &act->sa_handler);
+ ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
+ ret |= get_user(restorer, &act->sa_restorer);
+ ret |= get_user(mask, &act->sa_mask);
+ if (ret)
+ return ret;
+
+ sigact_set_handler(&new_ka, handler, restorer);
+ siginitset(&new_ka.sa.sa_mask, mask);
+ }
+
+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+ if (!ret && oact) {
+ ret = put_user(IA32_SA_HANDLER(&old_ka), &oact->sa_handler);
+ ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+ ret |= put_user(IA32_SA_RESTORER(&old_ka), &oact->sa_restorer);
+ ret |= put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+ }
+
+ return ret;
+}
+
+static int
+setup_sigcontext_ia32 (struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __user *fpstate,
+ struct pt_regs *regs, unsigned long mask)
+{
+ int err = 0;
+ unsigned long flag;
+
+ if (!access_ok(VERIFY_WRITE, sc, sizeof(*sc)))
+ return -EFAULT;
+
+ err |= __put_user((regs->r16 >> 32) & 0xffff, (unsigned int __user *)&sc->fs);
+ err |= __put_user((regs->r16 >> 48) & 0xffff, (unsigned int __user *)&sc->gs);
+ err |= __put_user((regs->r16 >> 16) & 0xffff, (unsigned int __user *)&sc->es);
+ err |= __put_user(regs->r16 & 0xffff, (unsigned int __user *)&sc->ds);
+ err |= __put_user(regs->r15, &sc->edi);
+ err |= __put_user(regs->r14, &sc->esi);
+ err |= __put_user(regs->r13, &sc->ebp);
+ err |= __put_user(regs->r12, &sc->esp);
+ err |= __put_user(regs->r11, &sc->ebx);
+ err |= __put_user(regs->r10, &sc->edx);
+ err |= __put_user(regs->r9, &sc->ecx);
+ err |= __put_user(regs->r8, &sc->eax);
+#if 0
+ err |= __put_user(current->tss.trap_no, &sc->trapno);
+ err |= __put_user(current->tss.error_code, &sc->err);
+#endif
+ err |= __put_user(regs->cr_iip, &sc->eip);
+ err |= __put_user(regs->r17 & 0xffff, (unsigned int __user *)&sc->cs);
+ /*
+ * `eflags' is in an ar register for this context
+ */
+ flag = ia64_getreg(_IA64_REG_AR_EFLAG);
+ err |= __put_user((unsigned int)flag, &sc->eflags);
+ err |= __put_user(regs->r12, &sc->esp_at_signal);
+ err |= __put_user((regs->r17 >> 16) & 0xffff, (unsigned int __user *)&sc->ss);
+
+ if ( save_ia32_fpstate_live(fpstate) < 0 )
+ err = -EFAULT;
+ else
+ err |= __put_user((u32)(u64)fpstate, &sc->fpstate);
+
+#if 0
+ tmp = save_i387(fpstate);
+ if (tmp < 0)
+ err = 1;
+ else
+ err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
+
+ /* non-iBCS2 extensions.. */
+#endif
+ err |= __put_user(mask, &sc->oldmask);
+#if 0
+ err |= __put_user(current->tss.cr2, &sc->cr2);
+#endif
+ return err;
+}
+
+static int
+restore_sigcontext_ia32 (struct pt_regs *regs, struct sigcontext_ia32 __user *sc, int *peax)
+{
+ unsigned int err = 0;
+
+ /* Always make any pending restarted system calls return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+ if (!access_ok(VERIFY_READ, sc, sizeof(*sc)))
+ return(-EFAULT);
+
+#define COPY(ia64x, ia32x) err |= __get_user(regs->ia64x, &sc->ia32x)
+
+#define copyseg_gs(tmp) (regs->r16 |= (unsigned long) (tmp) << 48)
+#define copyseg_fs(tmp) (regs->r16 |= (unsigned long) (tmp) << 32)
+#define copyseg_cs(tmp) (regs->r17 |= tmp)
+#define copyseg_ss(tmp) (regs->r17 |= (unsigned long) (tmp) << 16)
+#define copyseg_es(tmp) (regs->r16 |= (unsigned long) (tmp) << 16)
+#define copyseg_ds(tmp) (regs->r16 |= tmp)
+
+#define COPY_SEG(seg) \
+ { \
+ unsigned short tmp; \
+ err |= __get_user(tmp, &sc->seg); \
+ copyseg_##seg(tmp); \
+ }
+#define COPY_SEG_STRICT(seg) \
+ { \
+ unsigned short tmp; \
+ err |= __get_user(tmp, &sc->seg); \
+ copyseg_##seg(tmp|3); \
+ }
+
+ /* To make COPY_SEGs easier, we zero r16, r17 */
+ regs->r16 = 0;
+ regs->r17 = 0;
+
+ COPY_SEG(gs);
+ COPY_SEG(fs);
+ COPY_SEG(es);
+ COPY_SEG(ds);
+ COPY(r15, edi);
+ COPY(r14, esi);
+ COPY(r13, ebp);
+ COPY(r12, esp);
+ COPY(r11, ebx);
+ COPY(r10, edx);
+ COPY(r9, ecx);
+ COPY(cr_iip, eip);
+ COPY_SEG_STRICT(cs);
+ COPY_SEG_STRICT(ss);
+ ia32_load_segment_descriptors(current);
+ {
+ unsigned int tmpflags;
+ unsigned long flag;
+
+ /*
+ * IA32 `eflags' is not part of `pt_regs', it's in an ar register which
+ * is part of the thread context. Fortunately, we are executing in the
+ * IA32 process's context.
+ */
+ err |= __get_user(tmpflags, &sc->eflags);
+ flag = ia64_getreg(_IA64_REG_AR_EFLAG);
+ flag &= ~0x40DD5;
+ flag |= (tmpflags & 0x40DD5);
+ ia64_setreg(_IA64_REG_AR_EFLAG, flag);
+
+ regs->r1 = -1; /* disable syscall checks, r1 is orig_eax */
+ }
+
+ {
+ struct _fpstate_ia32 __user *buf = NULL;
+ u32 fpstate_ptr;
+ err |= get_user(fpstate_ptr, &(sc->fpstate));
+ buf = compat_ptr(fpstate_ptr);
+ if (buf) {
+ err |= restore_ia32_fpstate_live(buf);
+ }
+ }
+
+#if 0
+ {
+ struct _fpstate * buf;
+ err |= __get_user(buf, &sc->fpstate);
+ if (buf) {
+ if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
+ goto badframe;
+ err |= restore_i387(buf);
+ }
+ }
+#endif
+
+ err |= __get_user(*peax, &sc->eax);
+ return err;
+
+#if 0
+ badframe:
+ return 1;
+#endif
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe (struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+ unsigned long esp;
+
+ /* Default to using normal stack (truncate off sign-extension of bit 31: */
+ esp = (unsigned int) regs->r12;
+
+ /* This is the X/Open sanctioned signal stack switching. */
+ if (ka->sa.sa_flags & SA_ONSTACK) {
+ if (!on_sig_stack(esp))
+ esp = current->sas_ss_sp + current->sas_ss_size;
+ }
+ /* Legacy stack switching not supported */
+
+ return (void __user *)((esp - frame_size) & -8ul);
+}
+
+static int
+setup_frame_ia32 (int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs)
+{
+ struct exec_domain *ed = current_thread_info()->exec_domain;
+ struct sigframe_ia32 __user *frame;
+ int err = 0;
+
+ frame = get_sigframe(ka, regs, sizeof(*frame));
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ goto give_sigsegv;
+
+ err |= __put_user((ed && ed->signal_invmap && sig < 32
+ ? (int)(ed->signal_invmap[sig]) : sig), &frame->sig);
+
+ err |= setup_sigcontext_ia32(&frame->sc, &frame->fpstate, regs, set->sig[0]);
+
+ if (_COMPAT_NSIG_WORDS > 1)
+ err |= __copy_to_user(frame->extramask, (char *) &set->sig + 4,
+ sizeof(frame->extramask));
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ if (ka->sa.sa_flags & SA_RESTORER) {
+ unsigned int restorer = IA32_SA_RESTORER(ka);
+ err |= __put_user(restorer, &frame->pretcode);
+ } else {
+ /* Pointing to restorer in ia32 gate page */
+ err |= __put_user(IA32_GATE_OFFSET, &frame->pretcode);
+ }
+
+ /* This is popl %eax ; movl $,%eax ; int $0x80
+ * and there for historical reasons only.
+ * See arch/i386/kernel/signal.c
+ */
+
+ err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
+ err |= __put_user(__IA32_NR_sigreturn, (int __user *)(frame->retcode+2));
+ err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
+
+ if (err)
+ goto give_sigsegv;
+
+ /* Set up registers for signal handler */
+ regs->r12 = (unsigned long) frame;
+ regs->cr_iip = IA32_SA_HANDLER(ka);
+
+ set_fs(USER_DS);
+
+#if 0
+ regs->eflags &= ~TF_MASK;
+#endif
+
+#if 0
+ printk("SIG deliver (%s:%d): sig=%d sp=%p pc=%lx ra=%x\n",
+ current->comm, current->pid, sig, (void *) frame, regs->cr_iip, frame->pretcode);
+#endif
+
+ return 1;
+
+ give_sigsegv:
+ force_sigsegv(sig, current);
+ return 0;
+}
+
+static int
+setup_rt_frame_ia32 (int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs * regs)
+{
+ struct exec_domain *ed = current_thread_info()->exec_domain;
+ compat_uptr_t pinfo, puc;
+ struct rt_sigframe_ia32 __user *frame;
+ int err = 0;
+
+ frame = get_sigframe(ka, regs, sizeof(*frame));
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ goto give_sigsegv;
+
+ err |= __put_user((ed && ed->signal_invmap
+ && sig < 32 ? ed->signal_invmap[sig] : sig), &frame->sig);
+
+ pinfo = (long __user) &frame->info;
+ puc = (long __user) &frame->uc;
+ err |= __put_user(pinfo, &frame->pinfo);
+ err |= __put_user(puc, &frame->puc);
+ err |= copy_siginfo_to_user32(&frame->info, info);
+
+ /* Create the ucontext. */
+ err |= __put_user(0, &frame->uc.uc_flags);
+ err |= __put_user(0, &frame->uc.uc_link);
+ err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+ err |= __put_user(sas_ss_flags(regs->r12), &frame->uc.uc_stack.ss_flags);
+ err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= setup_sigcontext_ia32(&frame->uc.uc_mcontext, &frame->fpstate, regs, set->sig[0]);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ if (err)
+ goto give_sigsegv;
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ if (ka->sa.sa_flags & SA_RESTORER) {
+ unsigned int restorer = IA32_SA_RESTORER(ka);
+ err |= __put_user(restorer, &frame->pretcode);
+ } else {
+ /* Pointing to rt_restorer in ia32 gate page */
+ err |= __put_user(IA32_GATE_OFFSET + 8, &frame->pretcode);
+ }
+
+ /* This is movl $,%eax ; int $0x80
+ * and there for historical reasons only.
+ * See arch/i386/kernel/signal.c
+ */
+
+ err |= __put_user(0xb8, (char __user *)(frame->retcode+0));
+ err |= __put_user(__IA32_NR_rt_sigreturn, (int __user *)(frame->retcode+1));
+ err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
+
+ if (err)
+ goto give_sigsegv;
+
+ /* Set up registers for signal handler */
+ regs->r12 = (unsigned long) frame;
+ regs->cr_iip = IA32_SA_HANDLER(ka);
+
+ set_fs(USER_DS);
+
+#if 0
+ regs->eflags &= ~TF_MASK;
+#endif
+
+#if 0
+ printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%x\n",
+ current->comm, current->pid, (void *) frame, regs->cr_iip, frame->pretcode);
+#endif
+
+ return 1;
+
+give_sigsegv:
+ force_sigsegv(sig, current);
+ return 0;
+}
+
+int
+ia32_setup_frame1 (int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
+{
+ /* Set up the stack frame */
+ if (ka->sa.sa_flags & SA_SIGINFO)
+ return setup_rt_frame_ia32(sig, ka, info, set, regs);
+ else
+ return setup_frame_ia32(sig, ka, set, regs);
+}
+
+asmlinkage long
+sys32_sigreturn (int arg0, int arg1, int arg2, int arg3, int arg4, int arg5,
+ int arg6, int arg7, struct pt_regs regs)
+{
+ unsigned long esp = (unsigned int) regs.r12;
+ struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(esp - 8);
+ sigset_t set;
+ int eax;
+
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+ goto badframe;
+
+ if (__get_user(set.sig[0], &frame->sc.oldmask)
+ || (_COMPAT_NSIG_WORDS > 1 && __copy_from_user((char *) &set.sig + 4, &frame->extramask,
+ sizeof(frame->extramask))))
+ goto badframe;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+ spin_lock_irq(&current->sighand->siglock);
+ current->blocked = set;
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+
+ if (restore_sigcontext_ia32(&regs, &frame->sc, &eax))
+ goto badframe;
+ return eax;
+
+ badframe:
+ force_sig(SIGSEGV, current);
+ return 0;
+}
+
+asmlinkage long
+sys32_rt_sigreturn (int arg0, int arg1, int arg2, int arg3, int arg4,
+ int arg5, int arg6, int arg7, struct pt_regs regs)
+{
+ unsigned long esp = (unsigned int) regs.r12;
+ struct rt_sigframe_ia32 __user *frame = (struct rt_sigframe_ia32 __user *)(esp - 4);
+ sigset_t set;
+ int eax;
+
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+ goto badframe;
+ if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+ goto badframe;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+ spin_lock_irq(&current->sighand->siglock);
+ current->blocked = set;
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+
+ if (restore_sigcontext_ia32(&regs, &frame->uc.uc_mcontext, &eax))
+ goto badframe;
+
+ /* It is more difficult to avoid calling this function than to
+ call it and ignore errors. */
+ do_sigaltstack((stack_t __user *) &frame->uc.uc_stack, NULL, esp);
+
+ return eax;
+
+ badframe:
+ force_sig(SIGSEGV, current);
+ return 0;
+}
diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c
new file mode 100644
index 00000000000..4f630043b3a
--- /dev/null
+++ b/arch/ia64/ia32/ia32_support.c
@@ -0,0 +1,264 @@
+/*
+ * IA32 helper functions
+ *
+ * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 2000 Asit K. Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 2001-2002 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 06/16/00 A. Mallick added csd/ssd/tssd for ia32 thread context
+ * 02/19/01 D. Mosberger dropped tssd; it's not needed
+ * 09/14/01 D. Mosberger fixed memory management for gdt/tss page
+ * 09/29/01 D. Mosberger added ia32_load_segment_descriptors()
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/personality.h>
+#include <linux/sched.h>
+
+#include <asm/intrinsics.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+
+#include "ia32priv.h"
+
+extern void die_if_kernel (char *str, struct pt_regs *regs, long err);
+
+struct exec_domain ia32_exec_domain;
+struct page *ia32_shared_page[NR_CPUS];
+unsigned long *ia32_boot_gdt;
+unsigned long *cpu_gdt_table[NR_CPUS];
+struct page *ia32_gate_page;
+
+static unsigned long
+load_desc (u16 selector)
+{
+ unsigned long *table, limit, index;
+
+ if (!selector)
+ return 0;
+ if (selector & IA32_SEGSEL_TI) {
+ table = (unsigned long *) IA32_LDT_OFFSET;
+ limit = IA32_LDT_ENTRIES;
+ } else {
+ table = cpu_gdt_table[smp_processor_id()];
+ limit = IA32_PAGE_SIZE / sizeof(ia32_boot_gdt[0]);
+ }
+ index = selector >> IA32_SEGSEL_INDEX_SHIFT;
+ if (index >= limit)
+ return 0;
+ return IA32_SEG_UNSCRAMBLE(table[index]);
+}
+
+void
+ia32_load_segment_descriptors (struct task_struct *task)
+{
+ struct pt_regs *regs = ia64_task_regs(task);
+
+ /* Setup the segment descriptors */
+ regs->r24 = load_desc(regs->r16 >> 16); /* ESD */
+ regs->r27 = load_desc(regs->r16 >> 0); /* DSD */
+ regs->r28 = load_desc(regs->r16 >> 32); /* FSD */
+ regs->r29 = load_desc(regs->r16 >> 48); /* GSD */
+ regs->ar_csd = load_desc(regs->r17 >> 0); /* CSD */
+ regs->ar_ssd = load_desc(regs->r17 >> 16); /* SSD */
+}
+
+int
+ia32_clone_tls (struct task_struct *child, struct pt_regs *childregs)
+{
+ struct desc_struct *desc;
+ struct ia32_user_desc info;
+ int idx;
+
+ if (copy_from_user(&info, (void __user *)(childregs->r14 & 0xffffffff), sizeof(info)))
+ return -EFAULT;
+ if (LDT_empty(&info))
+ return -EINVAL;
+
+ idx = info.entry_number;
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+ desc->a = LDT_entry_a(&info);
+ desc->b = LDT_entry_b(&info);
+
+ /* XXX: can this be done in a cleaner way ? */
+ load_TLS(&child->thread, smp_processor_id());
+ ia32_load_segment_descriptors(child);
+ load_TLS(&current->thread, smp_processor_id());
+
+ return 0;
+}
+
+void
+ia32_save_state (struct task_struct *t)
+{
+ t->thread.eflag = ia64_getreg(_IA64_REG_AR_EFLAG);
+ t->thread.fsr = ia64_getreg(_IA64_REG_AR_FSR);
+ t->thread.fcr = ia64_getreg(_IA64_REG_AR_FCR);
+ t->thread.fir = ia64_getreg(_IA64_REG_AR_FIR);
+ t->thread.fdr = ia64_getreg(_IA64_REG_AR_FDR);
+ ia64_set_kr(IA64_KR_IO_BASE, t->thread.old_iob);
+ ia64_set_kr(IA64_KR_TSSD, t->thread.old_k1);
+}
+
+void
+ia32_load_state (struct task_struct *t)
+{
+ unsigned long eflag, fsr, fcr, fir, fdr, tssd;
+ struct pt_regs *regs = ia64_task_regs(t);
+
+ eflag = t->thread.eflag;
+ fsr = t->thread.fsr;
+ fcr = t->thread.fcr;
+ fir = t->thread.fir;
+ fdr = t->thread.fdr;
+ tssd = load_desc(_TSS); /* TSSD */
+
+ ia64_setreg(_IA64_REG_AR_EFLAG, eflag);
+ ia64_setreg(_IA64_REG_AR_FSR, fsr);
+ ia64_setreg(_IA64_REG_AR_FCR, fcr);
+ ia64_setreg(_IA64_REG_AR_FIR, fir);
+ ia64_setreg(_IA64_REG_AR_FDR, fdr);
+ current->thread.old_iob = ia64_get_kr(IA64_KR_IO_BASE);
+ current->thread.old_k1 = ia64_get_kr(IA64_KR_TSSD);
+ ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE);
+ ia64_set_kr(IA64_KR_TSSD, tssd);
+
+ regs->r17 = (_TSS << 48) | (_LDT << 32) | (__u32) regs->r17;
+ regs->r30 = load_desc(_LDT); /* LDTD */
+ load_TLS(&t->thread, smp_processor_id());
+}
+
+/*
+ * Setup IA32 GDT and TSS
+ */
+void
+ia32_gdt_init (void)
+{
+ int cpu = smp_processor_id();
+
+ ia32_shared_page[cpu] = alloc_page(GFP_KERNEL);
+ if (!ia32_shared_page[cpu])
+ panic("failed to allocate ia32_shared_page[%d]\n", cpu);
+
+ cpu_gdt_table[cpu] = page_address(ia32_shared_page[cpu]);
+
+ /* Copy from the boot cpu's GDT */
+ memcpy(cpu_gdt_table[cpu], ia32_boot_gdt, PAGE_SIZE);
+}
+
+
+/*
+ * Setup IA32 GDT and TSS
+ */
+static void
+ia32_boot_gdt_init (void)
+{
+ unsigned long ldt_size;
+
+ ia32_shared_page[0] = alloc_page(GFP_KERNEL);
+ if (!ia32_shared_page[0])
+ panic("failed to allocate ia32_shared_page[0]\n");
+
+ ia32_boot_gdt = page_address(ia32_shared_page[0]);
+ cpu_gdt_table[0] = ia32_boot_gdt;
+
+ /* CS descriptor in IA-32 (scrambled) format */
+ ia32_boot_gdt[__USER_CS >> 3]
+ = IA32_SEG_DESCRIPTOR(0, (IA32_GATE_END-1) >> IA32_PAGE_SHIFT,
+ 0xb, 1, 3, 1, 1, 1, 1);
+
+ /* DS descriptor in IA-32 (scrambled) format */
+ ia32_boot_gdt[__USER_DS >> 3]
+ = IA32_SEG_DESCRIPTOR(0, (IA32_GATE_END-1) >> IA32_PAGE_SHIFT,
+ 0x3, 1, 3, 1, 1, 1, 1);
+
+ ldt_size = PAGE_ALIGN(IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE);
+ ia32_boot_gdt[TSS_ENTRY] = IA32_SEG_DESCRIPTOR(IA32_TSS_OFFSET, 235,
+ 0xb, 0, 3, 1, 1, 1, 0);
+ ia32_boot_gdt[LDT_ENTRY] = IA32_SEG_DESCRIPTOR(IA32_LDT_OFFSET, ldt_size - 1,
+ 0x2, 0, 3, 1, 1, 1, 0);
+}
+
+static void
+ia32_gate_page_init(void)
+{
+ unsigned long *sr;
+
+ ia32_gate_page = alloc_page(GFP_KERNEL);
+ sr = page_address(ia32_gate_page);
+ /* This is popl %eax ; movl $,%eax ; int $0x80 */
+ *sr++ = 0xb858 | (__IA32_NR_sigreturn << 16) | (0x80cdUL << 48);
+
+ /* This is movl $,%eax ; int $0x80 */
+ *sr = 0xb8 | (__IA32_NR_rt_sigreturn << 8) | (0x80cdUL << 40);
+}
+
+void
+ia32_mem_init(void)
+{
+ ia32_boot_gdt_init();
+ ia32_gate_page_init();
+}
+
+/*
+ * Handle bad IA32 interrupt via syscall
+ */
+void
+ia32_bad_interrupt (unsigned long int_num, struct pt_regs *regs)
+{
+ siginfo_t siginfo;
+
+ die_if_kernel("Bad IA-32 interrupt", regs, int_num);
+
+ siginfo.si_signo = SIGTRAP;
+ siginfo.si_errno = int_num; /* XXX is it OK to abuse si_errno like this? */
+ siginfo.si_flags = 0;
+ siginfo.si_isr = 0;
+ siginfo.si_addr = NULL;
+ siginfo.si_imm = 0;
+ siginfo.si_code = TRAP_BRKPT;
+ force_sig_info(SIGTRAP, &siginfo, current);
+}
+
+void
+ia32_cpu_init (void)
+{
+ /* initialize global ia32 state - CR0 and CR4 */
+ ia64_setreg(_IA64_REG_AR_CFLAG, (((ulong) IA32_CR4 << 32) | IA32_CR0));
+}
+
+static int __init
+ia32_init (void)
+{
+ ia32_exec_domain.name = "Linux/x86";
+ ia32_exec_domain.handler = NULL;
+ ia32_exec_domain.pers_low = PER_LINUX32;
+ ia32_exec_domain.pers_high = PER_LINUX32;
+ ia32_exec_domain.signal_map = default_exec_domain.signal_map;
+ ia32_exec_domain.signal_invmap = default_exec_domain.signal_invmap;
+ register_exec_domain(&ia32_exec_domain);
+
+#if PAGE_SHIFT > IA32_PAGE_SHIFT
+ {
+ extern kmem_cache_t *partial_page_cachep;
+
+ partial_page_cachep = kmem_cache_create("partial_page_cache",
+ sizeof(struct partial_page), 0, 0,
+ NULL, NULL);
+ if (!partial_page_cachep)
+ panic("Cannot create partial page SLAB cache");
+ }
+#endif
+ return 0;
+}
+
+__initcall(ia32_init);
diff --git a/arch/ia64/ia32/ia32_traps.c b/arch/ia64/ia32/ia32_traps.c
new file mode 100644
index 00000000000..e486042672f
--- /dev/null
+++ b/arch/ia64/ia32/ia32_traps.c
@@ -0,0 +1,156 @@
+/*
+ * IA-32 exception handlers
+ *
+ * Copyright (C) 2000 Asit K. Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 2001-2002 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 06/16/00 A. Mallick added siginfo for most cases (close to IA32)
+ * 09/29/00 D. Mosberger added ia32_intercept()
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include "ia32priv.h"
+
+#include <asm/intrinsics.h>
+#include <asm/ptrace.h>
+
+int
+ia32_intercept (struct pt_regs *regs, unsigned long isr)
+{
+ switch ((isr >> 16) & 0xff) {
+ case 0: /* Instruction intercept fault */
+ case 4: /* Locked Data reference fault */
+ case 1: /* Gate intercept trap */
+ return -1;
+
+ case 2: /* System flag trap */
+ if (((isr >> 14) & 0x3) >= 2) {
+ /* MOV SS, POP SS instructions */
+ ia64_psr(regs)->id = 1;
+ return 0;
+ } else
+ return -1;
+ }
+ return -1;
+}
+
+int
+ia32_exception (struct pt_regs *regs, unsigned long isr)
+{
+ struct siginfo siginfo;
+
+ /* initialize these fields to avoid leaking kernel bits to user space: */
+ siginfo.si_errno = 0;
+ siginfo.si_flags = 0;
+ siginfo.si_isr = 0;
+ siginfo.si_imm = 0;
+ switch ((isr >> 16) & 0xff) {
+ case 1:
+ case 2:
+ siginfo.si_signo = SIGTRAP;
+ if (isr == 0)
+ siginfo.si_code = TRAP_TRACE;
+ else if (isr & 0x4)
+ siginfo.si_code = TRAP_BRANCH;
+ else
+ siginfo.si_code = TRAP_BRKPT;
+ break;
+
+ case 3:
+ siginfo.si_signo = SIGTRAP;
+ siginfo.si_code = TRAP_BRKPT;
+ break;
+
+ case 0: /* Divide fault */
+ siginfo.si_signo = SIGFPE;
+ siginfo.si_code = FPE_INTDIV;
+ break;
+
+ case 4: /* Overflow */
+ case 5: /* Bounds fault */
+ siginfo.si_signo = SIGFPE;
+ siginfo.si_code = 0;
+ break;
+
+ case 6: /* Invalid Op-code */
+ siginfo.si_signo = SIGILL;
+ siginfo.si_code = ILL_ILLOPN;
+ break;
+
+ case 7: /* FP DNA */
+ case 8: /* Double Fault */
+ case 9: /* Invalid TSS */
+ case 11: /* Segment not present */
+ case 12: /* Stack fault */
+ case 13: /* General Protection Fault */
+ siginfo.si_signo = SIGSEGV;
+ siginfo.si_code = 0;
+ break;
+
+ case 16: /* Pending FP error */
+ {
+ unsigned long fsr, fcr;
+
+ fsr = ia64_getreg(_IA64_REG_AR_FSR);
+ fcr = ia64_getreg(_IA64_REG_AR_FCR);
+
+ siginfo.si_signo = SIGFPE;
+ /*
+ * (~cwd & swd) will mask out exceptions that are not set to unmasked
+ * status. 0x3f is the exception bits in these regs, 0x200 is the
+ * C1 reg you need in case of a stack fault, 0x040 is the stack
+ * fault bit. We should only be taking one exception at a time,
+ * so if this combination doesn't produce any single exception,
+ * then we have a bad program that isn't synchronizing its FPU usage
+ * and it will suffer the consequences since we won't be able to
+ * fully reproduce the context of the exception
+ */
+ siginfo.si_isr = isr;
+ siginfo.si_flags = __ISR_VALID;
+ switch(((~fcr) & (fsr & 0x3f)) | (fsr & 0x240)) {
+ case 0x000:
+ default:
+ siginfo.si_code = 0;
+ break;
+ case 0x001: /* Invalid Op */
+ case 0x040: /* Stack Fault */
+ case 0x240: /* Stack Fault | Direction */
+ siginfo.si_code = FPE_FLTINV;
+ break;
+ case 0x002: /* Denormalize */
+ case 0x010: /* Underflow */
+ siginfo.si_code = FPE_FLTUND;
+ break;
+ case 0x004: /* Zero Divide */
+ siginfo.si_code = FPE_FLTDIV;
+ break;
+ case 0x008: /* Overflow */
+ siginfo.si_code = FPE_FLTOVF;
+ break;
+ case 0x020: /* Precision */
+ siginfo.si_code = FPE_FLTRES;
+ break;
+ }
+
+ break;
+ }
+
+ case 17: /* Alignment check */
+ siginfo.si_signo = SIGSEGV;
+ siginfo.si_code = BUS_ADRALN;
+ break;
+
+ case 19: /* SSE Numeric error */
+ siginfo.si_signo = SIGFPE;
+ siginfo.si_code = 0;
+ break;
+
+ default:
+ return -1;
+ }
+ force_sig_info(siginfo.si_signo, &siginfo, current);
+ return 0;
+}
diff --git a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h
new file mode 100644
index 00000000000..b2de948bdae
--- /dev/null
+++ b/arch/ia64/ia32/ia32priv.h
@@ -0,0 +1,544 @@
+#ifndef _ASM_IA64_IA32_PRIV_H
+#define _ASM_IA64_IA32_PRIV_H
+
+#include <linux/config.h>
+
+#include <asm/ia32.h>
+
+#ifdef CONFIG_IA32_SUPPORT
+
+#include <linux/binfmts.h>
+#include <linux/compat.h>
+#include <linux/rbtree.h>
+
+#include <asm/processor.h>
+
+/*
+ * 32 bit structures for IA32 support.
+ */
+
+#define IA32_PAGE_SIZE (1UL << IA32_PAGE_SHIFT)
+#define IA32_PAGE_MASK (~(IA32_PAGE_SIZE - 1))
+#define IA32_PAGE_ALIGN(addr) (((addr) + IA32_PAGE_SIZE - 1) & IA32_PAGE_MASK)
+#define IA32_CLOCKS_PER_SEC 100 /* Cast in stone for IA32 Linux */
+
+/*
+ * partially mapped pages provide precise accounting of which 4k sub pages
+ * are mapped and which ones are not, thereby improving IA-32 compatibility.
+ */
+struct partial_page {
+ struct partial_page *next; /* linked list, sorted by address */
+ struct rb_node pp_rb;
+ /* 64K is the largest "normal" page supported by ia64 ABI. So 4K*32
+ * should suffice.*/
+ unsigned int bitmap;
+ unsigned int base;
+};
+
+struct partial_page_list {
+ struct partial_page *pp_head; /* list head, points to the lowest
+ * addressed partial page */
+ struct rb_root ppl_rb;
+ struct partial_page *pp_hint; /* pp_hint->next is the last
+ * accessed partial page */
+ atomic_t pp_count; /* reference count */
+};
+
+#if PAGE_SHIFT > IA32_PAGE_SHIFT
+struct partial_page_list* ia32_init_pp_list (void);
+#else
+# define ia32_init_pp_list() 0
+#endif
+
+/* sigcontext.h */
+/*
+ * As documented in the iBCS2 standard..
+ *
+ * The first part of "struct _fpstate" is just the
+ * normal i387 hardware setup, the extra "status"
+ * word is used to save the coprocessor status word
+ * before entering the handler.
+ */
+struct _fpreg_ia32 {
+ unsigned short significand[4];
+ unsigned short exponent;
+};
+
+struct _fpxreg_ia32 {
+ unsigned short significand[4];
+ unsigned short exponent;
+ unsigned short padding[3];
+};
+
+struct _xmmreg_ia32 {
+ unsigned int element[4];
+};
+
+
+struct _fpstate_ia32 {
+ unsigned int cw,
+ sw,
+ tag,
+ ipoff,
+ cssel,
+ dataoff,
+ datasel;
+ struct _fpreg_ia32 _st[8];
+ unsigned short status;
+ unsigned short magic; /* 0xffff = regular FPU data only */
+
+ /* FXSR FPU environment */
+ unsigned int _fxsr_env[6]; /* FXSR FPU env is ignored */
+ unsigned int mxcsr;
+ unsigned int reserved;
+ struct _fpxreg_ia32 _fxsr_st[8]; /* FXSR FPU reg data is ignored */
+ struct _xmmreg_ia32 _xmm[8];
+ unsigned int padding[56];
+};
+
+struct sigcontext_ia32 {
+ unsigned short gs, __gsh;
+ unsigned short fs, __fsh;
+ unsigned short es, __esh;
+ unsigned short ds, __dsh;
+ unsigned int edi;
+ unsigned int esi;
+ unsigned int ebp;
+ unsigned int esp;
+ unsigned int ebx;
+ unsigned int edx;
+ unsigned int ecx;
+ unsigned int eax;
+ unsigned int trapno;
+ unsigned int err;
+ unsigned int eip;
+ unsigned short cs, __csh;
+ unsigned int eflags;
+ unsigned int esp_at_signal;
+ unsigned short ss, __ssh;
+ unsigned int fpstate; /* really (struct _fpstate_ia32 *) */
+ unsigned int oldmask;
+ unsigned int cr2;
+};
+
+/* user.h */
+/*
+ * IA32 (Pentium III/4) FXSR, SSE support
+ *
+ * Provide support for the GDB 5.0+ PTRACE_{GET|SET}FPXREGS requests for
+ * interacting with the FXSR-format floating point environment. Floating
+ * point data can be accessed in the regular format in the usual manner,
+ * and both the standard and SIMD floating point data can be accessed via
+ * the new ptrace requests. In either case, changes to the FPU environment
+ * will be reflected in the task's state as expected.
+ */
+struct ia32_user_i387_struct {
+ int cwd;
+ int swd;
+ int twd;
+ int fip;
+ int fcs;
+ int foo;
+ int fos;
+ /* 8*10 bytes for each FP-reg = 80 bytes */
+ struct _fpreg_ia32 st_space[8];
+};
+
+struct ia32_user_fxsr_struct {
+ unsigned short cwd;
+ unsigned short swd;
+ unsigned short twd;
+ unsigned short fop;
+ int fip;
+ int fcs;
+ int foo;
+ int fos;
+ int mxcsr;
+ int reserved;
+ int st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
+ int xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
+ int padding[56];
+};
+
+/* signal.h */
+#define IA32_SET_SA_HANDLER(ka,handler,restorer) \
+ ((ka)->sa.sa_handler = (__sighandler_t) \
+ (((unsigned long)(restorer) << 32) \
+ | ((handler) & 0xffffffff)))
+#define IA32_SA_HANDLER(ka) ((unsigned long) (ka)->sa.sa_handler & 0xffffffff)
+#define IA32_SA_RESTORER(ka) ((unsigned long) (ka)->sa.sa_handler >> 32)
+
+#define __IA32_NR_sigreturn 119
+#define __IA32_NR_rt_sigreturn 173
+
+struct sigaction32 {
+ unsigned int sa_handler; /* Really a pointer, but need to deal with 32 bits */
+ unsigned int sa_flags;
+ unsigned int sa_restorer; /* Another 32 bit pointer */
+ compat_sigset_t sa_mask; /* A 32 bit mask */
+};
+
+struct old_sigaction32 {
+ unsigned int sa_handler; /* Really a pointer, but need to deal
+ with 32 bits */
+ compat_old_sigset_t sa_mask; /* A 32 bit mask */
+ unsigned int sa_flags;
+ unsigned int sa_restorer; /* Another 32 bit pointer */
+};
+
+typedef struct sigaltstack_ia32 {
+ unsigned int ss_sp;
+ int ss_flags;
+ unsigned int ss_size;
+} stack_ia32_t;
+
+struct ucontext_ia32 {
+ unsigned int uc_flags;
+ unsigned int uc_link;
+ stack_ia32_t uc_stack;
+ struct sigcontext_ia32 uc_mcontext;
+ sigset_t uc_sigmask; /* mask last for extensibility */
+};
+
+struct stat64 {
+ unsigned long long st_dev;
+ unsigned char __pad0[4];
+ unsigned int __st_ino;
+ unsigned int st_mode;
+ unsigned int st_nlink;
+ unsigned int st_uid;
+ unsigned int st_gid;
+ unsigned long long st_rdev;
+ unsigned char __pad3[4];
+ unsigned int st_size_lo;
+ unsigned int st_size_hi;
+ unsigned int st_blksize;
+ unsigned int st_blocks; /* Number 512-byte blocks allocated. */
+ unsigned int __pad4; /* future possible st_blocks high bits */
+ unsigned int st_atime;
+ unsigned int st_atime_nsec;
+ unsigned int st_mtime;
+ unsigned int st_mtime_nsec;
+ unsigned int st_ctime;
+ unsigned int st_ctime_nsec;
+ unsigned int st_ino_lo;
+ unsigned int st_ino_hi;
+};
+
+typedef struct compat_siginfo {
+ int si_signo;
+ int si_errno;
+ int si_code;
+
+ union {
+ int _pad[((128/sizeof(int)) - 3)];
+
+ /* kill() */
+ struct {
+ unsigned int _pid; /* sender's pid */
+ unsigned int _uid; /* sender's uid */
+ } _kill;
+
+ /* POSIX.1b timers */
+ struct {
+ timer_t _tid; /* timer id */
+ int _overrun; /* overrun count */
+ char _pad[sizeof(unsigned int) - sizeof(int)];
+ compat_sigval_t _sigval; /* same as below */
+ int _sys_private; /* not to be passed to user */
+ } _timer;
+
+ /* POSIX.1b signals */
+ struct {
+ unsigned int _pid; /* sender's pid */
+ unsigned int _uid; /* sender's uid */
+ compat_sigval_t _sigval;
+ } _rt;
+
+ /* SIGCHLD */
+ struct {
+ unsigned int _pid; /* which child */
+ unsigned int _uid; /* sender's uid */
+ int _status; /* exit code */
+ compat_clock_t _utime;
+ compat_clock_t _stime;
+ } _sigchld;
+
+ /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+ struct {
+ unsigned int _addr; /* faulting insn/memory ref. */
+ } _sigfault;
+
+ /* SIGPOLL */
+ struct {
+ int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
+ int _fd;
+ } _sigpoll;
+ } _sifields;
+} compat_siginfo_t;
+
+struct old_linux32_dirent {
+ u32 d_ino;
+ u32 d_offset;
+ u16 d_namlen;
+ char d_name[1];
+};
+
+/*
+ * IA-32 ELF specific definitions for IA-64.
+ */
+
+#define _ASM_IA64_ELF_H /* Don't include elf.h */
+
+#include <linux/sched.h>
+#include <asm/processor.h>
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ((x)->e_machine == EM_386)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS ELFCLASS32
+#define ELF_DATA ELFDATA2LSB
+#define ELF_ARCH EM_386
+
+#define IA32_PAGE_OFFSET 0xc0000000
+#define IA32_STACK_TOP IA32_PAGE_OFFSET
+#define IA32_GATE_OFFSET IA32_PAGE_OFFSET
+#define IA32_GATE_END IA32_PAGE_OFFSET + PAGE_SIZE
+
+/*
+ * The system segments (GDT, TSS, LDT) have to be mapped below 4GB so the IA-32 engine can
+ * access them.
+ */
+#define IA32_GDT_OFFSET (IA32_PAGE_OFFSET + PAGE_SIZE)
+#define IA32_TSS_OFFSET (IA32_PAGE_OFFSET + 2*PAGE_SIZE)
+#define IA32_LDT_OFFSET (IA32_PAGE_OFFSET + 3*PAGE_SIZE)
+
+#define ELF_EXEC_PAGESIZE IA32_PAGE_SIZE
+
+/*
+ * This is the location that an ET_DYN program is loaded if exec'ed.
+ * Typical use of this is to invoke "./ld.so someprog" to test out a
+ * new version of the loader. We need to make sure that it is out of
+ * the way of the program that it will "exec", and that there is
+ * sufficient room for the brk.
+ */
+#define ELF_ET_DYN_BASE (IA32_PAGE_OFFSET/3 + 0x1000000)
+
+void ia64_elf32_init(struct pt_regs *regs);
+#define ELF_PLAT_INIT(_r, load_addr) ia64_elf32_init(_r)
+
+#define elf_addr_t u32
+
+/* This macro yields a bitmask that programs can use to figure out
+ what instruction set this CPU supports. */
+#define ELF_HWCAP 0
+
+/* This macro yields a string that ld.so will use to load
+ implementation specific libraries for optimization. Not terribly
+ relevant until we have real hardware to play with... */
+#define ELF_PLATFORM NULL
+
+#ifdef __KERNEL__
+# define SET_PERSONALITY(EX,IBCS2) \
+ (current->personality = (IBCS2) ? PER_SVR4 : PER_LINUX)
+#endif
+
+#define IA32_EFLAG 0x200
+
+/*
+ * IA-32 ELF specific definitions for IA-64.
+ */
+
+#define __USER_CS 0x23
+#define __USER_DS 0x2B
+
+/*
+ * The per-cpu GDT has 32 entries: see <asm-i386/segment.h>
+ */
+#define GDT_ENTRIES 32
+
+#define GDT_SIZE (GDT_ENTRIES * 8)
+
+#define TSS_ENTRY 14
+#define LDT_ENTRY (TSS_ENTRY + 1)
+
+#define IA32_SEGSEL_RPL (0x3 << 0)
+#define IA32_SEGSEL_TI (0x1 << 2)
+#define IA32_SEGSEL_INDEX_SHIFT 3
+
+#define _TSS ((unsigned long) TSS_ENTRY << IA32_SEGSEL_INDEX_SHIFT)
+#define _LDT ((unsigned long) LDT_ENTRY << IA32_SEGSEL_INDEX_SHIFT)
+
+#define IA32_SEG_BASE 16
+#define IA32_SEG_TYPE 40
+#define IA32_SEG_SYS 44
+#define IA32_SEG_DPL 45
+#define IA32_SEG_P 47
+#define IA32_SEG_HIGH_LIMIT 48
+#define IA32_SEG_AVL 52
+#define IA32_SEG_DB 54
+#define IA32_SEG_G 55
+#define IA32_SEG_HIGH_BASE 56
+
+#define IA32_SEG_DESCRIPTOR(base, limit, segtype, nonsysseg, dpl, segpresent, avl, segdb, gran) \
+ (((limit) & 0xffff) \
+ | (((unsigned long) (base) & 0xffffff) << IA32_SEG_BASE) \
+ | ((unsigned long) (segtype) << IA32_SEG_TYPE) \
+ | ((unsigned long) (nonsysseg) << IA32_SEG_SYS) \
+ | ((unsigned long) (dpl) << IA32_SEG_DPL) \
+ | ((unsigned long) (segpresent) << IA32_SEG_P) \
+ | ((((unsigned long) (limit) >> 16) & 0xf) << IA32_SEG_HIGH_LIMIT) \
+ | ((unsigned long) (avl) << IA32_SEG_AVL) \
+ | ((unsigned long) (segdb) << IA32_SEG_DB) \
+ | ((unsigned long) (gran) << IA32_SEG_G) \
+ | ((((unsigned long) (base) >> 24) & 0xff) << IA32_SEG_HIGH_BASE))
+
+#define SEG_LIM 32
+#define SEG_TYPE 52
+#define SEG_SYS 56
+#define SEG_DPL 57
+#define SEG_P 59
+#define SEG_AVL 60
+#define SEG_DB 62
+#define SEG_G 63
+
+/* Unscramble an IA-32 segment descriptor into the IA-64 format. */
+#define IA32_SEG_UNSCRAMBLE(sd) \
+ ( (((sd) >> IA32_SEG_BASE) & 0xffffff) | ((((sd) >> IA32_SEG_HIGH_BASE) & 0xff) << 24) \
+ | ((((sd) & 0xffff) | ((((sd) >> IA32_SEG_HIGH_LIMIT) & 0xf) << 16)) << SEG_LIM) \
+ | ((((sd) >> IA32_SEG_TYPE) & 0xf) << SEG_TYPE) \
+ | ((((sd) >> IA32_SEG_SYS) & 0x1) << SEG_SYS) \
+ | ((((sd) >> IA32_SEG_DPL) & 0x3) << SEG_DPL) \
+ | ((((sd) >> IA32_SEG_P) & 0x1) << SEG_P) \
+ | ((((sd) >> IA32_SEG_AVL) & 0x1) << SEG_AVL) \
+ | ((((sd) >> IA32_SEG_DB) & 0x1) << SEG_DB) \
+ | ((((sd) >> IA32_SEG_G) & 0x1) << SEG_G))
+
+#define IA32_IOBASE 0x2000000000000000UL /* Virtual address for I/O space */
+
+#define IA32_CR0 0x80000001 /* Enable PG and PE bits */
+#define IA32_CR4 0x600 /* MMXEX and FXSR on */
+
+/*
+ * IA32 floating point control registers starting values
+ */
+
+#define IA32_FSR_DEFAULT 0x55550000 /* set all tag bits */
+#define IA32_FCR_DEFAULT 0x17800000037fUL /* extended precision, all masks */
+
+#define IA32_PTRACE_GETREGS 12
+#define IA32_PTRACE_SETREGS 13
+#define IA32_PTRACE_GETFPREGS 14
+#define IA32_PTRACE_SETFPREGS 15
+#define IA32_PTRACE_GETFPXREGS 18
+#define IA32_PTRACE_SETFPXREGS 19
+
+#define ia32_start_thread(regs,new_ip,new_sp) do { \
+ set_fs(USER_DS); \
+ ia64_psr(regs)->cpl = 3; /* set user mode */ \
+ ia64_psr(regs)->ri = 0; /* clear return slot number */ \
+ ia64_psr(regs)->is = 1; /* IA-32 instruction set */ \
+ regs->cr_iip = new_ip; \
+ regs->ar_rsc = 0xc; /* enforced lazy mode, priv. level 3 */ \
+ regs->ar_rnat = 0; \
+ regs->loadrs = 0; \
+ regs->r12 = new_sp; \
+} while (0)
+
+/*
+ * Local Descriptor Table (LDT) related declarations.
+ */
+
+#define IA32_LDT_ENTRIES 8192 /* Maximum number of LDT entries supported. */
+#define IA32_LDT_ENTRY_SIZE 8 /* The size of each LDT entry. */
+
+#define LDT_entry_a(info) \
+ ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
+
+#define LDT_entry_b(info) \
+ (((info)->base_addr & 0xff000000) | \
+ (((info)->base_addr & 0x00ff0000) >> 16) | \
+ ((info)->limit & 0xf0000) | \
+ (((info)->read_exec_only ^ 1) << 9) | \
+ ((info)->contents << 10) | \
+ (((info)->seg_not_present ^ 1) << 15) | \
+ ((info)->seg_32bit << 22) | \
+ ((info)->limit_in_pages << 23) | \
+ ((info)->useable << 20) | \
+ 0x7100)
+
+#define LDT_empty(info) ( \
+ (info)->base_addr == 0 && \
+ (info)->limit == 0 && \
+ (info)->contents == 0 && \
+ (info)->read_exec_only == 1 && \
+ (info)->seg_32bit == 0 && \
+ (info)->limit_in_pages == 0 && \
+ (info)->seg_not_present == 1 && \
+ (info)->useable == 0 )
+
+static inline void
+load_TLS (struct thread_struct *t, unsigned int cpu)
+{
+ extern unsigned long *cpu_gdt_table[NR_CPUS];
+
+ memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0], sizeof(long));
+ memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1], sizeof(long));
+ memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2], sizeof(long));
+}
+
+struct ia32_user_desc {
+ unsigned int entry_number;
+ unsigned int base_addr;
+ unsigned int limit;
+ unsigned int seg_32bit:1;
+ unsigned int contents:2;
+ unsigned int read_exec_only:1;
+ unsigned int limit_in_pages:1;
+ unsigned int seg_not_present:1;
+ unsigned int useable:1;
+};
+
+struct linux_binprm;
+
+extern void ia32_init_addr_space (struct pt_regs *regs);
+extern int ia32_setup_arg_pages (struct linux_binprm *bprm, int exec_stack);
+extern unsigned long ia32_do_mmap (struct file *, unsigned long, unsigned long, int, int, loff_t);
+extern void ia32_load_segment_descriptors (struct task_struct *task);
+
+#define ia32f2ia64f(dst,src) \
+do { \
+ ia64_ldfe(6,src); \
+ ia64_stop(); \
+ ia64_stf_spill(dst, 6); \
+} while(0)
+
+#define ia64f2ia32f(dst,src) \
+do { \
+ ia64_ldf_fill(6, src); \
+ ia64_stop(); \
+ ia64_stfe(dst, 6); \
+} while(0)
+
+struct user_regs_struct32 {
+ __u32 ebx, ecx, edx, esi, edi, ebp, eax;
+ unsigned short ds, __ds, es, __es;
+ unsigned short fs, __fs, gs, __gs;
+ __u32 orig_eax, eip;
+ unsigned short cs, __cs;
+ __u32 eflags, esp;
+ unsigned short ss, __ss;
+};
+
+/* Prototypes for use in elfcore32.h */
+extern int save_ia32_fpstate (struct task_struct *, struct ia32_user_i387_struct __user *);
+extern int save_ia32_fpxstate (struct task_struct *, struct ia32_user_fxsr_struct __user *);
+
+#endif /* !CONFIG_IA32_SUPPORT */
+
+#endif /* _ASM_IA64_IA32_PRIV_H */
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
new file mode 100644
index 00000000000..247a21c64ae
--- /dev/null
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -0,0 +1,2747 @@
+/*
+ * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Derived from sys_sparc32.c.
+ *
+ * Copyright (C) 2000 VA Linux Co
+ * Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
+ * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 2000-2003, 2005 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2004 Gordon Jin <gordon.jin@intel.com>
+ *
+ * These routines maintain argument size conversion between 32bit and 64bit
+ * environment.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/sysctl.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/signal.h>
+#include <linux/resource.h>
+#include <linux/times.h>
+#include <linux/utsname.h>
+#include <linux/timex.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/mm.h>
+#include <linux/shm.h>
+#include <linux/slab.h>
+#include <linux/uio.h>
+#include <linux/nfs_fs.h>
+#include <linux/quota.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr.h>
+#include <linux/nfsd/syscall.h>
+#include <linux/poll.h>
+#include <linux/eventpoll.h>
+#include <linux/personality.h>
+#include <linux/ptrace.h>
+#include <linux/stat.h>
+#include <linux/ipc.h>
+#include <linux/compat.h>
+#include <linux/vfs.h>
+#include <linux/mman.h>
+
+#include <asm/intrinsics.h>
+#include <asm/semaphore.h>
+#include <asm/types.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+#include "ia32priv.h"
+
+#include <net/scm.h>
+#include <net/sock.h>
+
+#define DEBUG 0
+
+#if DEBUG
+# define DBG(fmt...) printk(KERN_DEBUG fmt)
+#else
+# define DBG(fmt...)
+#endif
+
+#define ROUND_UP(x,a) ((__typeof__(x))(((unsigned long)(x) + ((a) - 1)) & ~((a) - 1)))
+
+#define OFFSET4K(a) ((a) & 0xfff)
+#define PAGE_START(addr) ((addr) & PAGE_MASK)
+#define MINSIGSTKSZ_IA32 2048
+
+#define high2lowuid(uid) ((uid) > 65535 ? 65534 : (uid))
+#define high2lowgid(gid) ((gid) > 65535 ? 65534 : (gid))
+
+/*
+ * Anything that modifies or inspects ia32 user virtual memory must hold this semaphore
+ * while doing so.
+ */
+/* XXX make per-mm: */
+static DECLARE_MUTEX(ia32_mmap_sem);
+
+asmlinkage long
+sys32_execve (char __user *name, compat_uptr_t __user *argv, compat_uptr_t __user *envp,
+ struct pt_regs *regs)
+{
+ long error;
+ char *filename;
+ unsigned long old_map_base, old_task_size, tssd;
+
+ filename = getname(name);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+ return error;
+
+ old_map_base = current->thread.map_base;
+ old_task_size = current->thread.task_size;
+ tssd = ia64_get_kr(IA64_KR_TSSD);
+
+ /* we may be exec'ing a 64-bit process: reset map base, task-size, and io-base: */
+ current->thread.map_base = DEFAULT_MAP_BASE;
+ current->thread.task_size = DEFAULT_TASK_SIZE;
+ ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob);
+ ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
+
+ error = compat_do_execve(filename, argv, envp, regs);
+ putname(filename);
+
+ if (error < 0) {
+ /* oops, execve failed, switch back to old values... */
+ ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE);
+ ia64_set_kr(IA64_KR_TSSD, tssd);
+ current->thread.map_base = old_map_base;
+ current->thread.task_size = old_task_size;
+ }
+
+ return error;
+}
+
+int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
+{
+ int err;
+
+ if ((u64) stat->size > MAX_NON_LFS ||
+ !old_valid_dev(stat->dev) ||
+ !old_valid_dev(stat->rdev))
+ return -EOVERFLOW;
+
+ if (clear_user(ubuf, sizeof(*ubuf)))
+ return -EFAULT;
+
+ err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev);
+ err |= __put_user(stat->ino, &ubuf->st_ino);
+ err |= __put_user(stat->mode, &ubuf->st_mode);
+ err |= __put_user(stat->nlink, &ubuf->st_nlink);
+ err |= __put_user(high2lowuid(stat->uid), &ubuf->st_uid);
+ err |= __put_user(high2lowgid(stat->gid), &ubuf->st_gid);
+ err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev);
+ err |= __put_user(stat->size, &ubuf->st_size);
+ err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime);
+ err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec);
+ err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime);
+ err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec);
+ err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime);
+ err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec);
+ err |= __put_user(stat->blksize, &ubuf->st_blksize);
+ err |= __put_user(stat->blocks, &ubuf->st_blocks);
+ return err;
+}
+
+#if PAGE_SHIFT > IA32_PAGE_SHIFT
+
+
+static int
+get_page_prot (struct vm_area_struct *vma, unsigned long addr)
+{
+ int prot = 0;
+
+ if (!vma || vma->vm_start > addr)
+ return 0;
+
+ if (vma->vm_flags & VM_READ)
+ prot |= PROT_READ;
+ if (vma->vm_flags & VM_WRITE)
+ prot |= PROT_WRITE;
+ if (vma->vm_flags & VM_EXEC)
+ prot |= PROT_EXEC;
+ return prot;
+}
+
+/*
+ * Map a subpage by creating an anonymous page that contains the union of the old page and
+ * the subpage.
+ */
+static unsigned long
+mmap_subpage (struct file *file, unsigned long start, unsigned long end, int prot, int flags,
+ loff_t off)
+{
+ void *page = NULL;
+ struct inode *inode;
+ unsigned long ret = 0;
+ struct vm_area_struct *vma = find_vma(current->mm, start);
+ int old_prot = get_page_prot(vma, start);
+
+ DBG("mmap_subpage(file=%p,start=0x%lx,end=0x%lx,prot=%x,flags=%x,off=0x%llx)\n",
+ file, start, end, prot, flags, off);
+
+
+ /* Optimize the case where the old mmap and the new mmap are both anonymous */
+ if ((old_prot & PROT_WRITE) && (flags & MAP_ANONYMOUS) && !vma->vm_file) {
+ if (clear_user((void __user *) start, end - start)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ goto skip_mmap;
+ }
+
+ page = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ if (old_prot)
+ copy_from_user(page, (void __user *) PAGE_START(start), PAGE_SIZE);
+
+ down_write(&current->mm->mmap_sem);
+ {
+ ret = do_mmap(NULL, PAGE_START(start), PAGE_SIZE, prot | PROT_WRITE,
+ flags | MAP_FIXED | MAP_ANONYMOUS, 0);
+ }
+ up_write(&current->mm->mmap_sem);
+
+ if (IS_ERR((void *) ret))
+ goto out;
+
+ if (old_prot) {
+ /* copy back the old page contents. */
+ if (offset_in_page(start))
+ copy_to_user((void __user *) PAGE_START(start), page,
+ offset_in_page(start));
+ if (offset_in_page(end))
+ copy_to_user((void __user *) end, page + offset_in_page(end),
+ PAGE_SIZE - offset_in_page(end));
+ }
+
+ if (!(flags & MAP_ANONYMOUS)) {
+ /* read the file contents */
+ inode = file->f_dentry->d_inode;
+ if (!inode->i_fop || !file->f_op->read
+ || ((*file->f_op->read)(file, (char __user *) start, end - start, &off) < 0))
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ skip_mmap:
+ if (!(prot & PROT_WRITE))
+ ret = sys_mprotect(PAGE_START(start), PAGE_SIZE, prot | old_prot);
+ out:
+ if (page)
+ free_page((unsigned long) page);
+ return ret;
+}
+
+/* SLAB cache for partial_page structures */
+kmem_cache_t *partial_page_cachep;
+
+/*
+ * init partial_page_list.
+ * return 0 means kmalloc fail.
+ */
+struct partial_page_list*
+ia32_init_pp_list(void)
+{
+ struct partial_page_list *p;
+
+ if ((p = kmalloc(sizeof(*p), GFP_KERNEL)) == NULL)
+ return p;
+ p->pp_head = NULL;
+ p->ppl_rb = RB_ROOT;
+ p->pp_hint = NULL;
+ atomic_set(&p->pp_count, 1);
+ return p;
+}
+
+/*
+ * Search for the partial page with @start in partial page list @ppl.
+ * If finds the partial page, return the found partial page.
+ * Else, return 0 and provide @pprev, @rb_link, @rb_parent to
+ * be used by later __ia32_insert_pp().
+ */
+static struct partial_page *
+__ia32_find_pp(struct partial_page_list *ppl, unsigned int start,
+ struct partial_page **pprev, struct rb_node ***rb_link,
+ struct rb_node **rb_parent)
+{
+ struct partial_page *pp;
+ struct rb_node **__rb_link, *__rb_parent, *rb_prev;
+
+ pp = ppl->pp_hint;
+ if (pp && pp->base == start)
+ return pp;
+
+ __rb_link = &ppl->ppl_rb.rb_node;
+ rb_prev = __rb_parent = NULL;
+
+ while (*__rb_link) {
+ __rb_parent = *__rb_link;
+ pp = rb_entry(__rb_parent, struct partial_page, pp_rb);
+
+ if (pp->base == start) {
+ ppl->pp_hint = pp;
+ return pp;
+ } else if (pp->base < start) {
+ rb_prev = __rb_parent;
+ __rb_link = &__rb_parent->rb_right;
+ } else {
+ __rb_link = &__rb_parent->rb_left;
+ }
+ }
+
+ *rb_link = __rb_link;
+ *rb_parent = __rb_parent;
+ *pprev = NULL;
+ if (rb_prev)
+ *pprev = rb_entry(rb_prev, struct partial_page, pp_rb);
+ return NULL;
+}
+
+/*
+ * insert @pp into @ppl.
+ */
+static void
+__ia32_insert_pp(struct partial_page_list *ppl, struct partial_page *pp,
+ struct partial_page *prev, struct rb_node **rb_link,
+ struct rb_node *rb_parent)
+{
+ /* link list */
+ if (prev) {
+ pp->next = prev->next;
+ prev->next = pp;
+ } else {
+ ppl->pp_head = pp;
+ if (rb_parent)
+ pp->next = rb_entry(rb_parent,
+ struct partial_page, pp_rb);
+ else
+ pp->next = NULL;
+ }
+
+ /* link rb */
+ rb_link_node(&pp->pp_rb, rb_parent, rb_link);
+ rb_insert_color(&pp->pp_rb, &ppl->ppl_rb);
+
+ ppl->pp_hint = pp;
+}
+
+/*
+ * delete @pp from partial page list @ppl.
+ */
+static void
+__ia32_delete_pp(struct partial_page_list *ppl, struct partial_page *pp,
+ struct partial_page *prev)
+{
+ if (prev) {
+ prev->next = pp->next;
+ if (ppl->pp_hint == pp)
+ ppl->pp_hint = prev;
+ } else {
+ ppl->pp_head = pp->next;
+ if (ppl->pp_hint == pp)
+ ppl->pp_hint = pp->next;
+ }
+ rb_erase(&pp->pp_rb, &ppl->ppl_rb);
+ kmem_cache_free(partial_page_cachep, pp);
+}
+
+static struct partial_page *
+__pp_prev(struct partial_page *pp)
+{
+ struct rb_node *prev = rb_prev(&pp->pp_rb);
+ if (prev)
+ return rb_entry(prev, struct partial_page, pp_rb);
+ else
+ return NULL;
+}
+
+/*
+ * Delete partial pages with address between @start and @end.
+ * @start and @end are page aligned.
+ */
+static void
+__ia32_delete_pp_range(unsigned int start, unsigned int end)
+{
+ struct partial_page *pp, *prev;
+ struct rb_node **rb_link, *rb_parent;
+
+ if (start >= end)
+ return;
+
+ pp = __ia32_find_pp(current->thread.ppl, start, &prev,
+ &rb_link, &rb_parent);
+ if (pp)
+ prev = __pp_prev(pp);
+ else {
+ if (prev)
+ pp = prev->next;
+ else
+ pp = current->thread.ppl->pp_head;
+ }
+
+ while (pp && pp->base < end) {
+ struct partial_page *tmp = pp->next;
+ __ia32_delete_pp(current->thread.ppl, pp, prev);
+ pp = tmp;
+ }
+}
+
+/*
+ * Set the range between @start and @end in bitmap.
+ * @start and @end should be IA32 page aligned and in the same IA64 page.
+ */
+static int
+__ia32_set_pp(unsigned int start, unsigned int end, int flags)
+{
+ struct partial_page *pp, *prev;
+ struct rb_node ** rb_link, *rb_parent;
+ unsigned int pstart, start_bit, end_bit, i;
+
+ pstart = PAGE_START(start);
+ start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
+ end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
+ if (end_bit == 0)
+ end_bit = PAGE_SIZE / IA32_PAGE_SIZE;
+ pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
+ &rb_link, &rb_parent);
+ if (pp) {
+ for (i = start_bit; i < end_bit; i++)
+ set_bit(i, &pp->bitmap);
+ /*
+ * Check: if this partial page has been set to a full page,
+ * then delete it.
+ */
+ if (find_first_zero_bit(&pp->bitmap, sizeof(pp->bitmap)*8) >=
+ PAGE_SIZE/IA32_PAGE_SIZE) {
+ __ia32_delete_pp(current->thread.ppl, pp, __pp_prev(pp));
+ }
+ return 0;
+ }
+
+ /*
+ * MAP_FIXED may lead to overlapping mmap.
+ * In this case, the requested mmap area may already mmaped as a full
+ * page. So check vma before adding a new partial page.
+ */
+ if (flags & MAP_FIXED) {
+ struct vm_area_struct *vma = find_vma(current->mm, pstart);
+ if (vma && vma->vm_start <= pstart)
+ return 0;
+ }
+
+ /* new a partial_page */
+ pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
+ if (!pp)
+ return -ENOMEM;
+ pp->base = pstart;
+ pp->bitmap = 0;
+ for (i=start_bit; i<end_bit; i++)
+ set_bit(i, &(pp->bitmap));
+ pp->next = NULL;
+ __ia32_insert_pp(current->thread.ppl, pp, prev, rb_link, rb_parent);
+ return 0;
+}
+
+/*
+ * @start and @end should be IA32 page aligned, but don't need to be in the
+ * same IA64 page. Split @start and @end to make sure they're in the same IA64
+ * page, then call __ia32_set_pp().
+ */
+static void
+ia32_set_pp(unsigned int start, unsigned int end, int flags)
+{
+ down_write(&current->mm->mmap_sem);
+ if (flags & MAP_FIXED) {
+ /*
+ * MAP_FIXED may lead to overlapping mmap. When this happens,
+ * a series of complete IA64 pages results in deletion of
+ * old partial pages in that range.
+ */
+ __ia32_delete_pp_range(PAGE_ALIGN(start), PAGE_START(end));
+ }
+
+ if (end < PAGE_ALIGN(start)) {
+ __ia32_set_pp(start, end, flags);
+ } else {
+ if (offset_in_page(start))
+ __ia32_set_pp(start, PAGE_ALIGN(start), flags);
+ if (offset_in_page(end))
+ __ia32_set_pp(PAGE_START(end), end, flags);
+ }
+ up_write(&current->mm->mmap_sem);
+}
+
+/*
+ * Unset the range between @start and @end in bitmap.
+ * @start and @end should be IA32 page aligned and in the same IA64 page.
+ * After doing that, if the bitmap is 0, then free the page and return 1,
+ * else return 0;
+ * If not find the partial page in the list, then
+ * If the vma exists, then the full page is set to a partial page;
+ * Else return -ENOMEM.
+ */
+static int
+__ia32_unset_pp(unsigned int start, unsigned int end)
+{
+ struct partial_page *pp, *prev;
+ struct rb_node ** rb_link, *rb_parent;
+ unsigned int pstart, start_bit, end_bit, i;
+ struct vm_area_struct *vma;
+
+ pstart = PAGE_START(start);
+ start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
+ end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
+ if (end_bit == 0)
+ end_bit = PAGE_SIZE / IA32_PAGE_SIZE;
+
+ pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
+ &rb_link, &rb_parent);
+ if (pp) {
+ for (i = start_bit; i < end_bit; i++)
+ clear_bit(i, &pp->bitmap);
+ if (pp->bitmap == 0) {
+ __ia32_delete_pp(current->thread.ppl, pp, __pp_prev(pp));
+ return 1;
+ }
+ return 0;
+ }
+
+ vma = find_vma(current->mm, pstart);
+ if (!vma || vma->vm_start > pstart) {
+ return -ENOMEM;
+ }
+
+ /* new a partial_page */
+ pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
+ if (!pp)
+ return -ENOMEM;
+ pp->base = pstart;
+ pp->bitmap = 0;
+ for (i = 0; i < start_bit; i++)
+ set_bit(i, &(pp->bitmap));
+ for (i = end_bit; i < PAGE_SIZE / IA32_PAGE_SIZE; i++)
+ set_bit(i, &(pp->bitmap));
+ pp->next = NULL;
+ __ia32_insert_pp(current->thread.ppl, pp, prev, rb_link, rb_parent);
+ return 0;
+}
+
+/*
+ * Delete pp between PAGE_ALIGN(start) and PAGE_START(end) by calling
+ * __ia32_delete_pp_range(). Unset possible partial pages by calling
+ * __ia32_unset_pp().
+ * The returned value see __ia32_unset_pp().
+ */
+static int
+ia32_unset_pp(unsigned int *startp, unsigned int *endp)
+{
+ unsigned int start = *startp, end = *endp;
+ int ret = 0;
+
+ down_write(&current->mm->mmap_sem);
+
+ __ia32_delete_pp_range(PAGE_ALIGN(start), PAGE_START(end));
+
+ if (end < PAGE_ALIGN(start)) {
+ ret = __ia32_unset_pp(start, end);
+ if (ret == 1) {
+ *startp = PAGE_START(start);
+ *endp = PAGE_ALIGN(end);
+ }
+ if (ret == 0) {
+ /* to shortcut sys_munmap() in sys32_munmap() */
+ *startp = PAGE_START(start);
+ *endp = PAGE_START(end);
+ }
+ } else {
+ if (offset_in_page(start)) {
+ ret = __ia32_unset_pp(start, PAGE_ALIGN(start));
+ if (ret == 1)
+ *startp = PAGE_START(start);
+ if (ret == 0)
+ *startp = PAGE_ALIGN(start);
+ if (ret < 0)
+ goto out;
+ }
+ if (offset_in_page(end)) {
+ ret = __ia32_unset_pp(PAGE_START(end), end);
+ if (ret == 1)
+ *endp = PAGE_ALIGN(end);
+ if (ret == 0)
+ *endp = PAGE_START(end);
+ }
+ }
+
+ out:
+ up_write(&current->mm->mmap_sem);
+ return ret;
+}
+
+/*
+ * Compare the range between @start and @end with bitmap in partial page.
+ * @start and @end should be IA32 page aligned and in the same IA64 page.
+ */
+static int
+__ia32_compare_pp(unsigned int start, unsigned int end)
+{
+ struct partial_page *pp, *prev;
+ struct rb_node ** rb_link, *rb_parent;
+ unsigned int pstart, start_bit, end_bit, size;
+ unsigned int first_bit, next_zero_bit; /* the first range in bitmap */
+
+ pstart = PAGE_START(start);
+
+ pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
+ &rb_link, &rb_parent);
+ if (!pp)
+ return 1;
+
+ start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
+ end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
+ size = sizeof(pp->bitmap) * 8;
+ first_bit = find_first_bit(&pp->bitmap, size);
+ next_zero_bit = find_next_zero_bit(&pp->bitmap, size, first_bit);
+ if ((start_bit < first_bit) || (end_bit > next_zero_bit)) {
+ /* exceeds the first range in bitmap */
+ return -ENOMEM;
+ } else if ((start_bit == first_bit) && (end_bit == next_zero_bit)) {
+ first_bit = find_next_bit(&pp->bitmap, size, next_zero_bit);
+ if ((next_zero_bit < first_bit) && (first_bit < size))
+ return 1; /* has next range */
+ else
+ return 0; /* no next range */
+ } else
+ return 1;
+}
+
+/*
+ * @start and @end should be IA32 page aligned, but don't need to be in the
+ * same IA64 page. Split @start and @end to make sure they're in the same IA64
+ * page, then call __ia32_compare_pp().
+ *
+ * Take this as example: the range is the 1st and 2nd 4K page.
+ * Return 0 if they fit bitmap exactly, i.e. bitmap = 00000011;
+ * Return 1 if the range doesn't cover whole bitmap, e.g. bitmap = 00001111;
+ * Return -ENOMEM if the range exceeds the bitmap, e.g. bitmap = 00000001 or
+ * bitmap = 00000101.
+ */
+static int
+ia32_compare_pp(unsigned int *startp, unsigned int *endp)
+{
+ unsigned int start = *startp, end = *endp;
+ int retval = 0;
+
+ down_write(&current->mm->mmap_sem);
+
+ if (end < PAGE_ALIGN(start)) {
+ retval = __ia32_compare_pp(start, end);
+ if (retval == 0) {
+ *startp = PAGE_START(start);
+ *endp = PAGE_ALIGN(end);
+ }
+ } else {
+ if (offset_in_page(start)) {
+ retval = __ia32_compare_pp(start,
+ PAGE_ALIGN(start));
+ if (retval == 0)
+ *startp = PAGE_START(start);
+ if (retval < 0)
+ goto out;
+ }
+ if (offset_in_page(end)) {
+ retval = __ia32_compare_pp(PAGE_START(end), end);
+ if (retval == 0)
+ *endp = PAGE_ALIGN(end);
+ }
+ }
+
+ out:
+ up_write(&current->mm->mmap_sem);
+ return retval;
+}
+
+static void
+__ia32_drop_pp_list(struct partial_page_list *ppl)
+{
+ struct partial_page *pp = ppl->pp_head;
+
+ while (pp) {
+ struct partial_page *next = pp->next;
+ kmem_cache_free(partial_page_cachep, pp);
+ pp = next;
+ }
+
+ kfree(ppl);
+}
+
+void
+ia32_drop_partial_page_list(struct task_struct *task)
+{
+ struct partial_page_list* ppl = task->thread.ppl;
+
+ if (ppl && atomic_dec_and_test(&ppl->pp_count))
+ __ia32_drop_pp_list(ppl);
+}
+
+/*
+ * Copy current->thread.ppl to ppl (already initialized).
+ */
+static int
+__ia32_copy_pp_list(struct partial_page_list *ppl)
+{
+ struct partial_page *pp, *tmp, *prev;
+ struct rb_node **rb_link, *rb_parent;
+
+ ppl->pp_head = NULL;
+ ppl->pp_hint = NULL;
+ ppl->ppl_rb = RB_ROOT;
+ rb_link = &ppl->ppl_rb.rb_node;
+ rb_parent = NULL;
+ prev = NULL;
+
+ for (pp = current->thread.ppl->pp_head; pp; pp = pp->next) {
+ tmp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+ *tmp = *pp;
+ __ia32_insert_pp(ppl, tmp, prev, rb_link, rb_parent);
+ prev = tmp;
+ rb_link = &tmp->pp_rb.rb_right;
+ rb_parent = &tmp->pp_rb;
+ }
+ return 0;
+}
+
+int
+ia32_copy_partial_page_list(struct task_struct *p, unsigned long clone_flags)
+{
+ int retval = 0;
+
+ if (clone_flags & CLONE_VM) {
+ atomic_inc(&current->thread.ppl->pp_count);
+ p->thread.ppl = current->thread.ppl;
+ } else {
+ p->thread.ppl = ia32_init_pp_list();
+ if (!p->thread.ppl)
+ return -ENOMEM;
+ down_write(&current->mm->mmap_sem);
+ {
+ retval = __ia32_copy_pp_list(p->thread.ppl);
+ }
+ up_write(&current->mm->mmap_sem);
+ }
+
+ return retval;
+}
+
+static unsigned long
+emulate_mmap (struct file *file, unsigned long start, unsigned long len, int prot, int flags,
+ loff_t off)
+{
+ unsigned long tmp, end, pend, pstart, ret, is_congruent, fudge = 0;
+ struct inode *inode;
+ loff_t poff;
+
+ end = start + len;
+ pstart = PAGE_START(start);
+ pend = PAGE_ALIGN(end);
+
+ if (flags & MAP_FIXED) {
+ ia32_set_pp((unsigned int)start, (unsigned int)end, flags);
+ if (start > pstart) {
+ if (flags & MAP_SHARED)
+ printk(KERN_INFO
+ "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n",
+ current->comm, current->pid, start);
+ ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags,
+ off);
+ if (IS_ERR((void *) ret))
+ return ret;
+ pstart += PAGE_SIZE;
+ if (pstart >= pend)
+ goto out; /* done */
+ }
+ if (end < pend) {
+ if (flags & MAP_SHARED)
+ printk(KERN_INFO
+ "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n",
+ current->comm, current->pid, end);
+ ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags,
+ (off + len) - offset_in_page(end));
+ if (IS_ERR((void *) ret))
+ return ret;
+ pend -= PAGE_SIZE;
+ if (pstart >= pend)
+ goto out; /* done */
+ }
+ } else {
+ /*
+ * If a start address was specified, use it if the entire rounded out area
+ * is available.
+ */
+ if (start && !pstart)
+ fudge = 1; /* handle case of mapping to range (0,PAGE_SIZE) */
+ tmp = arch_get_unmapped_area(file, pstart - fudge, pend - pstart, 0, flags);
+ if (tmp != pstart) {
+ pstart = tmp;
+ start = pstart + offset_in_page(off); /* make start congruent with off */
+ end = start + len;
+ pend = PAGE_ALIGN(end);
+ }
+ }
+
+ poff = off + (pstart - start); /* note: (pstart - start) may be negative */
+ is_congruent = (flags & MAP_ANONYMOUS) || (offset_in_page(poff) == 0);
+
+ if ((flags & MAP_SHARED) && !is_congruent)
+ printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
+ "(addr=0x%lx,off=0x%llx)\n", current->comm, current->pid, start, off);
+
+ DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend,
+ is_congruent ? "congruent" : "not congruent", poff);
+
+ down_write(&current->mm->mmap_sem);
+ {
+ if (!(flags & MAP_ANONYMOUS) && is_congruent)
+ ret = do_mmap(file, pstart, pend - pstart, prot, flags | MAP_FIXED, poff);
+ else
+ ret = do_mmap(NULL, pstart, pend - pstart,
+ prot | ((flags & MAP_ANONYMOUS) ? 0 : PROT_WRITE),
+ flags | MAP_FIXED | MAP_ANONYMOUS, 0);
+ }
+ up_write(&current->mm->mmap_sem);
+
+ if (IS_ERR((void *) ret))
+ return ret;
+
+ if (!is_congruent) {
+ /* read the file contents */
+ inode = file->f_dentry->d_inode;
+ if (!inode->i_fop || !file->f_op->read
+ || ((*file->f_op->read)(file, (char __user *) pstart, pend - pstart, &poff)
+ < 0))
+ {
+ sys_munmap(pstart, pend - pstart);
+ return -EINVAL;
+ }
+ if (!(prot & PROT_WRITE) && sys_mprotect(pstart, pend - pstart, prot) < 0)
+ return -EINVAL;
+ }
+
+ if (!(flags & MAP_FIXED))
+ ia32_set_pp((unsigned int)start, (unsigned int)end, flags);
+out:
+ return start;
+}
+
+#endif /* PAGE_SHIFT > IA32_PAGE_SHIFT */
+
+static inline unsigned int
+get_prot32 (unsigned int prot)
+{
+ if (prot & PROT_WRITE)
+ /* on x86, PROT_WRITE implies PROT_READ which implies PROT_EEC */
+ prot |= PROT_READ | PROT_WRITE | PROT_EXEC;
+ else if (prot & (PROT_READ | PROT_EXEC))
+ /* on x86, there is no distinction between PROT_READ and PROT_EXEC */
+ prot |= (PROT_READ | PROT_EXEC);
+
+ return prot;
+}
+
+unsigned long
+ia32_do_mmap (struct file *file, unsigned long addr, unsigned long len, int prot, int flags,
+ loff_t offset)
+{
+ DBG("ia32_do_mmap(file=%p,addr=0x%lx,len=0x%lx,prot=%x,flags=%x,offset=0x%llx)\n",
+ file, addr, len, prot, flags, offset);
+
+ if (file && (!file->f_op || !file->f_op->mmap))
+ return -ENODEV;
+
+ len = IA32_PAGE_ALIGN(len);
+ if (len == 0)
+ return addr;
+
+ if (len > IA32_PAGE_OFFSET || addr > IA32_PAGE_OFFSET - len)
+ {
+ if (flags & MAP_FIXED)
+ return -ENOMEM;
+ else
+ return -EINVAL;
+ }
+
+ if (OFFSET4K(offset))
+ return -EINVAL;
+
+ prot = get_prot32(prot);
+
+#if PAGE_SHIFT > IA32_PAGE_SHIFT
+ down(&ia32_mmap_sem);
+ {
+ addr = emulate_mmap(file, addr, len, prot, flags, offset);
+ }
+ up(&ia32_mmap_sem);
+#else
+ down_write(&current->mm->mmap_sem);
+ {
+ addr = do_mmap(file, addr, len, prot, flags, offset);
+ }
+ up_write(&current->mm->mmap_sem);
+#endif
+ DBG("ia32_do_mmap: returning 0x%lx\n", addr);
+ return addr;
+}
+
+/*
+ * Linux/i386 didn't use to be able to handle more than 4 system call parameters, so these
+ * system calls used a memory block for parameter passing..
+ */
+
+struct mmap_arg_struct {
+ unsigned int addr;
+ unsigned int len;
+ unsigned int prot;
+ unsigned int flags;
+ unsigned int fd;
+ unsigned int offset;
+};
+
+asmlinkage long
+sys32_mmap (struct mmap_arg_struct __user *arg)
+{
+ struct mmap_arg_struct a;
+ struct file *file = NULL;
+ unsigned long addr;
+ int flags;
+
+ if (copy_from_user(&a, arg, sizeof(a)))
+ return -EFAULT;
+
+ if (OFFSET4K(a.offset))
+ return -EINVAL;
+
+ flags = a.flags;
+
+ flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+ if (!(flags & MAP_ANONYMOUS)) {
+ file = fget(a.fd);
+ if (!file)
+ return -EBADF;
+ }
+
+ addr = ia32_do_mmap(file, a.addr, a.len, a.prot, flags, a.offset);
+
+ if (file)
+ fput(file);
+ return addr;
+}
+
+asmlinkage long
+sys32_mmap2 (unsigned int addr, unsigned int len, unsigned int prot, unsigned int flags,
+ unsigned int fd, unsigned int pgoff)
+{
+ struct file *file = NULL;
+ unsigned long retval;
+
+ flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+ if (!(flags & MAP_ANONYMOUS)) {
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+ }
+
+ retval = ia32_do_mmap(file, addr, len, prot, flags,
+ (unsigned long) pgoff << IA32_PAGE_SHIFT);
+
+ if (file)
+ fput(file);
+ return retval;
+}
+
+asmlinkage long
+sys32_munmap (unsigned int start, unsigned int len)
+{
+ unsigned int end = start + len;
+ long ret;
+
+#if PAGE_SHIFT <= IA32_PAGE_SHIFT
+ ret = sys_munmap(start, end - start);
+#else
+ if (OFFSET4K(start))
+ return -EINVAL;
+
+ end = IA32_PAGE_ALIGN(end);
+ if (start >= end)
+ return -EINVAL;
+
+ ret = ia32_unset_pp(&start, &end);
+ if (ret < 0)
+ return ret;
+
+ if (start >= end)
+ return 0;
+
+ down(&ia32_mmap_sem);
+ {
+ ret = sys_munmap(start, end - start);
+ }
+ up(&ia32_mmap_sem);
+#endif
+ return ret;
+}
+
+#if PAGE_SHIFT > IA32_PAGE_SHIFT
+
+/*
+ * When mprotect()ing a partial page, we set the permission to the union of the old
+ * settings and the new settings. In other words, it's only possible to make access to a
+ * partial page less restrictive.
+ */
+static long
+mprotect_subpage (unsigned long address, int new_prot)
+{
+ int old_prot;
+ struct vm_area_struct *vma;
+
+ if (new_prot == PROT_NONE)
+ return 0; /* optimize case where nothing changes... */
+ vma = find_vma(current->mm, address);
+ old_prot = get_page_prot(vma, address);
+ return sys_mprotect(address, PAGE_SIZE, new_prot | old_prot);
+}
+
+#endif /* PAGE_SHIFT > IA32_PAGE_SHIFT */
+
+asmlinkage long
+sys32_mprotect (unsigned int start, unsigned int len, int prot)
+{
+ unsigned int end = start + len;
+#if PAGE_SHIFT > IA32_PAGE_SHIFT
+ long retval = 0;
+#endif
+
+ prot = get_prot32(prot);
+
+#if PAGE_SHIFT <= IA32_PAGE_SHIFT
+ return sys_mprotect(start, end - start, prot);
+#else
+ if (OFFSET4K(start))
+ return -EINVAL;
+
+ end = IA32_PAGE_ALIGN(end);
+ if (end < start)
+ return -EINVAL;
+
+ retval = ia32_compare_pp(&start, &end);
+
+ if (retval < 0)
+ return retval;
+
+ down(&ia32_mmap_sem);
+ {
+ if (offset_in_page(start)) {
+ /* start address is 4KB aligned but not page aligned. */
+ retval = mprotect_subpage(PAGE_START(start), prot);
+ if (retval < 0)
+ goto out;
+
+ start = PAGE_ALIGN(start);
+ if (start >= end)
+ goto out; /* retval is already zero... */
+ }
+
+ if (offset_in_page(end)) {
+ /* end address is 4KB aligned but not page aligned. */
+ retval = mprotect_subpage(PAGE_START(end), prot);
+ if (retval < 0)
+ goto out;
+
+ end = PAGE_START(end);
+ }
+ retval = sys_mprotect(start, end - start, prot);
+ }
+ out:
+ up(&ia32_mmap_sem);
+ return retval;
+#endif
+}
+
+asmlinkage long
+sys32_mremap (unsigned int addr, unsigned int old_len, unsigned int new_len,
+ unsigned int flags, unsigned int new_addr)
+{
+ long ret;
+
+#if PAGE_SHIFT <= IA32_PAGE_SHIFT
+ ret = sys_mremap(addr, old_len, new_len, flags, new_addr);
+#else
+ unsigned int old_end, new_end;
+
+ if (OFFSET4K(addr))
+ return -EINVAL;
+
+ old_len = IA32_PAGE_ALIGN(old_len);
+ new_len = IA32_PAGE_ALIGN(new_len);
+ old_end = addr + old_len;
+ new_end = addr + new_len;
+
+ if (!new_len)
+ return -EINVAL;
+
+ if ((flags & MREMAP_FIXED) && (OFFSET4K(new_addr)))
+ return -EINVAL;
+
+ if (old_len >= new_len) {
+ ret = sys32_munmap(addr + new_len, old_len - new_len);
+ if (ret && old_len != new_len)
+ return ret;
+ ret = addr;
+ if (!(flags & MREMAP_FIXED) || (new_addr == addr))
+ return ret;
+ old_len = new_len;
+ }
+
+ addr = PAGE_START(addr);
+ old_len = PAGE_ALIGN(old_end) - addr;
+ new_len = PAGE_ALIGN(new_end) - addr;
+
+ down(&ia32_mmap_sem);
+ {
+ ret = sys_mremap(addr, old_len, new_len, flags, new_addr);
+ }
+ up(&ia32_mmap_sem);
+
+ if ((ret >= 0) && (old_len < new_len)) {
+ /* mremap expanded successfully */
+ ia32_set_pp(old_end, new_end, flags);
+ }
+#endif
+ return ret;
+}
+
+asmlinkage long
+sys32_pipe (int __user *fd)
+{
+ int retval;
+ int fds[2];
+
+ retval = do_pipe(fds);
+ if (retval)
+ goto out;
+ if (copy_to_user(fd, fds, sizeof(fds)))
+ retval = -EFAULT;
+ out:
+ return retval;
+}
+
+static inline long
+get_tv32 (struct timeval *o, struct compat_timeval __user *i)
+{
+ return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
+ (__get_user(o->tv_sec, &i->tv_sec) | __get_user(o->tv_usec, &i->tv_usec)));
+}
+
+static inline long
+put_tv32 (struct compat_timeval __user *o, struct timeval *i)
+{
+ return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
+ (__put_user(i->tv_sec, &o->tv_sec) | __put_user(i->tv_usec, &o->tv_usec)));
+}
+
+asmlinkage unsigned long
+sys32_alarm (unsigned int seconds)
+{
+ struct itimerval it_new, it_old;
+ unsigned int oldalarm;
+
+ it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+ it_new.it_value.tv_sec = seconds;
+ it_new.it_value.tv_usec = 0;
+ do_setitimer(ITIMER_REAL, &it_new, &it_old);
+ oldalarm = it_old.it_value.tv_sec;
+ /* ehhh.. We can't return 0 if we have an alarm pending.. */
+ /* And we'd better return too much than too little anyway */
+ if (it_old.it_value.tv_usec)
+ oldalarm++;
+ return oldalarm;
+}
+
+/* Translations due to time_t size differences. Which affects all
+ sorts of things, like timeval and itimerval. */
+
+extern struct timezone sys_tz;
+
+asmlinkage long
+sys32_gettimeofday (struct compat_timeval __user *tv, struct timezone __user *tz)
+{
+ if (tv) {
+ struct timeval ktv;
+ do_gettimeofday(&ktv);
+ if (put_tv32(tv, &ktv))
+ return -EFAULT;
+ }
+ if (tz) {
+ if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+asmlinkage long
+sys32_settimeofday (struct compat_timeval __user *tv, struct timezone __user *tz)
+{
+ struct timeval ktv;
+ struct timespec kts;
+ struct timezone ktz;
+
+ if (tv) {
+ if (get_tv32(&ktv, tv))
+ return -EFAULT;
+ kts.tv_sec = ktv.tv_sec;
+ kts.tv_nsec = ktv.tv_usec * 1000;
+ }
+ if (tz) {
+ if (copy_from_user(&ktz, tz, sizeof(ktz)))
+ return -EFAULT;
+ }
+
+ return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
+}
+
+struct getdents32_callback {
+ struct compat_dirent __user *current_dir;
+ struct compat_dirent __user *previous;
+ int count;
+ int error;
+};
+
+struct readdir32_callback {
+ struct old_linux32_dirent __user * dirent;
+ int count;
+};
+
+static int
+filldir32 (void *__buf, const char *name, int namlen, loff_t offset, ino_t ino,
+ unsigned int d_type)
+{
+ struct compat_dirent __user * dirent;
+ struct getdents32_callback * buf = (struct getdents32_callback *) __buf;
+ int reclen = ROUND_UP(offsetof(struct compat_dirent, d_name) + namlen + 1, 4);
+
+ buf->error = -EINVAL; /* only used if we fail.. */
+ if (reclen > buf->count)
+ return -EINVAL;
+ buf->error = -EFAULT; /* only used if we fail.. */
+ dirent = buf->previous;
+ if (dirent)
+ if (put_user(offset, &dirent->d_off))
+ return -EFAULT;
+ dirent = buf->current_dir;
+ buf->previous = dirent;
+ if (put_user(ino, &dirent->d_ino)
+ || put_user(reclen, &dirent->d_reclen)
+ || copy_to_user(dirent->d_name, name, namlen)
+ || put_user(0, dirent->d_name + namlen))
+ return -EFAULT;
+ dirent = (struct compat_dirent __user *) ((char __user *) dirent + reclen);
+ buf->current_dir = dirent;
+ buf->count -= reclen;
+ return 0;
+}
+
+asmlinkage long
+sys32_getdents (unsigned int fd, struct compat_dirent __user *dirent, unsigned int count)
+{
+ struct file * file;
+ struct compat_dirent __user * lastdirent;
+ struct getdents32_callback buf;
+ int error;
+
+ error = -EBADF;
+ file = fget(fd);
+ if (!file)
+ goto out;
+
+ buf.current_dir = dirent;
+ buf.previous = NULL;
+ buf.count = count;
+ buf.error = 0;
+
+ error = vfs_readdir(file, filldir32, &buf);
+ if (error < 0)
+ goto out_putf;
+ error = buf.error;
+ lastdirent = buf.previous;
+ if (lastdirent) {
+ error = -EINVAL;
+ if (put_user(file->f_pos, &lastdirent->d_off))
+ goto out_putf;
+ error = count - buf.count;
+ }
+
+out_putf:
+ fput(file);
+out:
+ return error;
+}
+
+static int
+fillonedir32 (void * __buf, const char * name, int namlen, loff_t offset, ino_t ino,
+ unsigned int d_type)
+{
+ struct readdir32_callback * buf = (struct readdir32_callback *) __buf;
+ struct old_linux32_dirent __user * dirent;
+
+ if (buf->count)
+ return -EINVAL;
+ buf->count++;
+ dirent = buf->dirent;
+ if (put_user(ino, &dirent->d_ino)
+ || put_user(offset, &dirent->d_offset)
+ || put_user(namlen, &dirent->d_namlen)
+ || copy_to_user(dirent->d_name, name, namlen)
+ || put_user(0, dirent->d_name + namlen))
+ return -EFAULT;
+ return 0;
+}
+
+asmlinkage long
+sys32_readdir (unsigned int fd, void __user *dirent, unsigned int count)
+{
+ int error;
+ struct file * file;
+ struct readdir32_callback buf;
+
+ error = -EBADF;
+ file = fget(fd);
+ if (!file)
+ goto out;
+
+ buf.count = 0;
+ buf.dirent = dirent;
+
+ error = vfs_readdir(file, fillonedir32, &buf);
+ if (error >= 0)
+ error = buf.count;
+ fput(file);
+out:
+ return error;
+}
+
+struct sel_arg_struct {
+ unsigned int n;
+ unsigned int inp;
+ unsigned int outp;
+ unsigned int exp;
+ unsigned int tvp;
+};
+
+asmlinkage long
+sys32_old_select (struct sel_arg_struct __user *arg)
+{
+ struct sel_arg_struct a;
+
+ if (copy_from_user(&a, arg, sizeof(a)))
+ return -EFAULT;
+ return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
+ compat_ptr(a.exp), compat_ptr(a.tvp));
+}
+
+#define SEMOP 1
+#define SEMGET 2
+#define SEMCTL 3
+#define SEMTIMEDOP 4
+#define MSGSND 11
+#define MSGRCV 12
+#define MSGGET 13
+#define MSGCTL 14
+#define SHMAT 21
+#define SHMDT 22
+#define SHMGET 23
+#define SHMCTL 24
+
+asmlinkage long
+sys32_ipc(u32 call, int first, int second, int third, u32 ptr, u32 fifth)
+{
+ int version;
+
+ version = call >> 16; /* hack for backward compatibility */
+ call &= 0xffff;
+
+ switch (call) {
+ case SEMTIMEDOP:
+ if (fifth)
+ return compat_sys_semtimedop(first, compat_ptr(ptr),
+ second, compat_ptr(fifth));
+ /* else fall through for normal semop() */
+ case SEMOP:
+ /* struct sembuf is the same on 32 and 64bit :)) */
+ return sys_semtimedop(first, compat_ptr(ptr), second,
+ NULL);
+ case SEMGET:
+ return sys_semget(first, second, third);
+ case SEMCTL:
+ return compat_sys_semctl(first, second, third, compat_ptr(ptr));
+
+ case MSGSND:
+ return compat_sys_msgsnd(first, second, third, compat_ptr(ptr));
+ case MSGRCV:
+ return compat_sys_msgrcv(first, second, fifth, third, version, compat_ptr(ptr));
+ case MSGGET:
+ return sys_msgget((key_t) first, second);
+ case MSGCTL:
+ return compat_sys_msgctl(first, second, compat_ptr(ptr));
+
+ case SHMAT:
+ return compat_sys_shmat(first, second, third, version, compat_ptr(ptr));
+ break;
+ case SHMDT:
+ return sys_shmdt(compat_ptr(ptr));
+ case SHMGET:
+ return sys_shmget(first, (unsigned)second, third);
+ case SHMCTL:
+ return compat_sys_shmctl(first, second, compat_ptr(ptr));
+
+ default:
+ return -ENOSYS;
+ }
+ return -EINVAL;
+}
+
+asmlinkage long
+compat_sys_wait4 (compat_pid_t pid, compat_uint_t * stat_addr, int options,
+ struct compat_rusage *ru);
+
+asmlinkage long
+sys32_waitpid (int pid, unsigned int *stat_addr, int options)
+{
+ return compat_sys_wait4(pid, stat_addr, options, NULL);
+}
+
+static unsigned int
+ia32_peek (struct task_struct *child, unsigned long addr, unsigned int *val)
+{
+ size_t copied;
+ unsigned int ret;
+
+ copied = access_process_vm(child, addr, val, sizeof(*val), 0);
+ return (copied != sizeof(ret)) ? -EIO : 0;
+}
+
+static unsigned int
+ia32_poke (struct task_struct *child, unsigned long addr, unsigned int val)
+{
+
+ if (access_process_vm(child, addr, &val, sizeof(val), 1) != sizeof(val))
+ return -EIO;
+ return 0;
+}
+
+/*
+ * The order in which registers are stored in the ptrace regs structure
+ */
+#define PT_EBX 0
+#define PT_ECX 1
+#define PT_EDX 2
+#define PT_ESI 3
+#define PT_EDI 4
+#define PT_EBP 5
+#define PT_EAX 6
+#define PT_DS 7
+#define PT_ES 8
+#define PT_FS 9
+#define PT_GS 10
+#define PT_ORIG_EAX 11
+#define PT_EIP 12
+#define PT_CS 13
+#define PT_EFL 14
+#define PT_UESP 15
+#define PT_SS 16
+
+static unsigned int
+getreg (struct task_struct *child, int regno)
+{
+ struct pt_regs *child_regs;
+
+ child_regs = ia64_task_regs(child);
+ switch (regno / sizeof(int)) {
+ case PT_EBX: return child_regs->r11;
+ case PT_ECX: return child_regs->r9;
+ case PT_EDX: return child_regs->r10;
+ case PT_ESI: return child_regs->r14;
+ case PT_EDI: return child_regs->r15;
+ case PT_EBP: return child_regs->r13;
+ case PT_EAX: return child_regs->r8;
+ case PT_ORIG_EAX: return child_regs->r1; /* see dispatch_to_ia32_handler() */
+ case PT_EIP: return child_regs->cr_iip;
+ case PT_UESP: return child_regs->r12;
+ case PT_EFL: return child->thread.eflag;
+ case PT_DS: case PT_ES: case PT_FS: case PT_GS: case PT_SS:
+ return __USER_DS;
+ case PT_CS: return __USER_CS;
+ default:
+ printk(KERN_ERR "ia32.getreg(): unknown register %d\n", regno);
+ break;
+ }
+ return 0;
+}
+
+static void
+putreg (struct task_struct *child, int regno, unsigned int value)
+{
+ struct pt_regs *child_regs;
+
+ child_regs = ia64_task_regs(child);
+ switch (regno / sizeof(int)) {
+ case PT_EBX: child_regs->r11 = value; break;
+ case PT_ECX: child_regs->r9 = value; break;
+ case PT_EDX: child_regs->r10 = value; break;
+ case PT_ESI: child_regs->r14 = value; break;
+ case PT_EDI: child_regs->r15 = value; break;
+ case PT_EBP: child_regs->r13 = value; break;
+ case PT_EAX: child_regs->r8 = value; break;
+ case PT_ORIG_EAX: child_regs->r1 = value; break;
+ case PT_EIP: child_regs->cr_iip = value; break;
+ case PT_UESP: child_regs->r12 = value; break;
+ case PT_EFL: child->thread.eflag = value; break;
+ case PT_DS: case PT_ES: case PT_FS: case PT_GS: case PT_SS:
+ if (value != __USER_DS)
+ printk(KERN_ERR
+ "ia32.putreg: attempt to set invalid segment register %d = %x\n",
+ regno, value);
+ break;
+ case PT_CS:
+ if (value != __USER_CS)
+ printk(KERN_ERR
+ "ia32.putreg: attempt to to set invalid segment register %d = %x\n",
+ regno, value);
+ break;
+ default:
+ printk(KERN_ERR "ia32.putreg: unknown register %d\n", regno);
+ break;
+ }
+}
+
+static void
+put_fpreg (int regno, struct _fpreg_ia32 __user *reg, struct pt_regs *ptp,
+ struct switch_stack *swp, int tos)
+{
+ struct _fpreg_ia32 *f;
+ char buf[32];
+
+ f = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15);
+ if ((regno += tos) >= 8)
+ regno -= 8;
+ switch (regno) {
+ case 0:
+ ia64f2ia32f(f, &ptp->f8);
+ break;
+ case 1:
+ ia64f2ia32f(f, &ptp->f9);
+ break;
+ case 2:
+ ia64f2ia32f(f, &ptp->f10);
+ break;
+ case 3:
+ ia64f2ia32f(f, &ptp->f11);
+ break;
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ ia64f2ia32f(f, &swp->f12 + (regno - 4));
+ break;
+ }
+ copy_to_user(reg, f, sizeof(*reg));
+}
+
+static void
+get_fpreg (int regno, struct _fpreg_ia32 __user *reg, struct pt_regs *ptp,
+ struct switch_stack *swp, int tos)
+{
+
+ if ((regno += tos) >= 8)
+ regno -= 8;
+ switch (regno) {
+ case 0:
+ copy_from_user(&ptp->f8, reg, sizeof(*reg));
+ break;
+ case 1:
+ copy_from_user(&ptp->f9, reg, sizeof(*reg));
+ break;
+ case 2:
+ copy_from_user(&ptp->f10, reg, sizeof(*reg));
+ break;
+ case 3:
+ copy_from_user(&ptp->f11, reg, sizeof(*reg));
+ break;
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ copy_from_user(&swp->f12 + (regno - 4), reg, sizeof(*reg));
+ break;
+ }
+ return;
+}
+
+int
+save_ia32_fpstate (struct task_struct *tsk, struct ia32_user_i387_struct __user *save)
+{
+ struct switch_stack *swp;
+ struct pt_regs *ptp;
+ int i, tos;
+
+ if (!access_ok(VERIFY_WRITE, save, sizeof(*save)))
+ return -EFAULT;
+
+ __put_user(tsk->thread.fcr & 0xffff, &save->cwd);
+ __put_user(tsk->thread.fsr & 0xffff, &save->swd);
+ __put_user((tsk->thread.fsr>>16) & 0xffff, &save->twd);
+ __put_user(tsk->thread.fir, &save->fip);
+ __put_user((tsk->thread.fir>>32) & 0xffff, &save->fcs);
+ __put_user(tsk->thread.fdr, &save->foo);
+ __put_user((tsk->thread.fdr>>32) & 0xffff, &save->fos);
+
+ /*
+ * Stack frames start with 16-bytes of temp space
+ */
+ swp = (struct switch_stack *)(tsk->thread.ksp + 16);
+ ptp = ia64_task_regs(tsk);
+ tos = (tsk->thread.fsr >> 11) & 7;
+ for (i = 0; i < 8; i++)
+ put_fpreg(i, &save->st_space[i], ptp, swp, tos);
+ return 0;
+}
+
+static int
+restore_ia32_fpstate (struct task_struct *tsk, struct ia32_user_i387_struct __user *save)
+{
+ struct switch_stack *swp;
+ struct pt_regs *ptp;
+ int i, tos;
+ unsigned int fsrlo, fsrhi, num32;
+
+ if (!access_ok(VERIFY_READ, save, sizeof(*save)))
+ return(-EFAULT);
+
+ __get_user(num32, (unsigned int __user *)&save->cwd);
+ tsk->thread.fcr = (tsk->thread.fcr & (~0x1f3f)) | (num32 & 0x1f3f);
+ __get_user(fsrlo, (unsigned int __user *)&save->swd);
+ __get_user(fsrhi, (unsigned int __user *)&save->twd);
+ num32 = (fsrhi << 16) | fsrlo;
+ tsk->thread.fsr = (tsk->thread.fsr & (~0xffffffff)) | num32;
+ __get_user(num32, (unsigned int __user *)&save->fip);
+ tsk->thread.fir = (tsk->thread.fir & (~0xffffffff)) | num32;
+ __get_user(num32, (unsigned int __user *)&save->foo);
+ tsk->thread.fdr = (tsk->thread.fdr & (~0xffffffff)) | num32;
+
+ /*
+ * Stack frames start with 16-bytes of temp space
+ */
+ swp = (struct switch_stack *)(tsk->thread.ksp + 16);
+ ptp = ia64_task_regs(tsk);
+ tos = (tsk->thread.fsr >> 11) & 7;
+ for (i = 0; i < 8; i++)
+ get_fpreg(i, &save->st_space[i], ptp, swp, tos);
+ return 0;
+}
+
+int
+save_ia32_fpxstate (struct task_struct *tsk, struct ia32_user_fxsr_struct __user *save)
+{
+ struct switch_stack *swp;
+ struct pt_regs *ptp;
+ int i, tos;
+ unsigned long mxcsr=0;
+ unsigned long num128[2];
+
+ if (!access_ok(VERIFY_WRITE, save, sizeof(*save)))
+ return -EFAULT;
+
+ __put_user(tsk->thread.fcr & 0xffff, &save->cwd);
+ __put_user(tsk->thread.fsr & 0xffff, &save->swd);
+ __put_user((tsk->thread.fsr>>16) & 0xffff, &save->twd);
+ __put_user(tsk->thread.fir, &save->fip);
+ __put_user((tsk->thread.fir>>32) & 0xffff, &save->fcs);
+ __put_user(tsk->thread.fdr, &save->foo);
+ __put_user((tsk->thread.fdr>>32) & 0xffff, &save->fos);
+
+ /*
+ * Stack frames start with 16-bytes of temp space
+ */
+ swp = (struct switch_stack *)(tsk->thread.ksp + 16);
+ ptp = ia64_task_regs(tsk);
+ tos = (tsk->thread.fsr >> 11) & 7;
+ for (i = 0; i < 8; i++)
+ put_fpreg(i, (struct _fpreg_ia32 __user *)&save->st_space[4*i], ptp, swp, tos);
+
+ mxcsr = ((tsk->thread.fcr>>32) & 0xff80) | ((tsk->thread.fsr>>32) & 0x3f);
+ __put_user(mxcsr & 0xffff, &save->mxcsr);
+ for (i = 0; i < 8; i++) {
+ memcpy(&(num128[0]), &(swp->f16) + i*2, sizeof(unsigned long));
+ memcpy(&(num128[1]), &(swp->f17) + i*2, sizeof(unsigned long));
+ copy_to_user(&save->xmm_space[0] + 4*i, num128, sizeof(struct _xmmreg_ia32));
+ }
+ return 0;
+}
+
+static int
+restore_ia32_fpxstate (struct task_struct *tsk, struct ia32_user_fxsr_struct __user *save)
+{
+ struct switch_stack *swp;
+ struct pt_regs *ptp;
+ int i, tos;
+ unsigned int fsrlo, fsrhi, num32;
+ int mxcsr;
+ unsigned long num64;
+ unsigned long num128[2];
+
+ if (!access_ok(VERIFY_READ, save, sizeof(*save)))
+ return(-EFAULT);
+
+ __get_user(num32, (unsigned int __user *)&save->cwd);
+ tsk->thread.fcr = (tsk->thread.fcr & (~0x1f3f)) | (num32 & 0x1f3f);
+ __get_user(fsrlo, (unsigned int __user *)&save->swd);
+ __get_user(fsrhi, (unsigned int __user *)&save->twd);
+ num32 = (fsrhi << 16) | fsrlo;
+ tsk->thread.fsr = (tsk->thread.fsr & (~0xffffffff)) | num32;
+ __get_user(num32, (unsigned int __user *)&save->fip);
+ tsk->thread.fir = (tsk->thread.fir & (~0xffffffff)) | num32;
+ __get_user(num32, (unsigned int __user *)&save->foo);
+ tsk->thread.fdr = (tsk->thread.fdr & (~0xffffffff)) | num32;
+
+ /*
+ * Stack frames start with 16-bytes of temp space
+ */
+ swp = (struct switch_stack *)(tsk->thread.ksp + 16);
+ ptp = ia64_task_regs(tsk);
+ tos = (tsk->thread.fsr >> 11) & 7;
+ for (i = 0; i < 8; i++)
+ get_fpreg(i, (struct _fpreg_ia32 __user *)&save->st_space[4*i], ptp, swp, tos);
+
+ __get_user(mxcsr, (unsigned int __user *)&save->mxcsr);
+ num64 = mxcsr & 0xff10;
+ tsk->thread.fcr = (tsk->thread.fcr & (~0xff1000000000UL)) | (num64<<32);
+ num64 = mxcsr & 0x3f;
+ tsk->thread.fsr = (tsk->thread.fsr & (~0x3f00000000UL)) | (num64<<32);
+
+ for (i = 0; i < 8; i++) {
+ copy_from_user(num128, &save->xmm_space[0] + 4*i, sizeof(struct _xmmreg_ia32));
+ memcpy(&(swp->f16) + i*2, &(num128[0]), sizeof(unsigned long));
+ memcpy(&(swp->f17) + i*2, &(num128[1]), sizeof(unsigned long));
+ }
+ return 0;
+}
+
+asmlinkage long
+sys32_ptrace (int request, pid_t pid, unsigned int addr, unsigned int data)
+{
+ struct task_struct *child;
+ unsigned int value, tmp;
+ long i, ret;
+
+ lock_kernel();
+ if (request == PTRACE_TRACEME) {
+ ret = sys_ptrace(request, pid, addr, data);
+ goto out;
+ }
+
+ ret = -ESRCH;
+ read_lock(&tasklist_lock);
+ child = find_task_by_pid(pid);
+ if (child)
+ get_task_struct(child);
+ read_unlock(&tasklist_lock);
+ if (!child)
+ goto out;
+ ret = -EPERM;
+ if (pid == 1) /* no messing around with init! */
+ goto out_tsk;
+
+ if (request == PTRACE_ATTACH) {
+ ret = sys_ptrace(request, pid, addr, data);
+ goto out_tsk;
+ }
+
+ ret = ptrace_check_attach(child, request == PTRACE_KILL);
+ if (ret < 0)
+ goto out_tsk;
+
+ switch (request) {
+ case PTRACE_PEEKTEXT:
+ case PTRACE_PEEKDATA: /* read word at location addr */
+ ret = ia32_peek(child, addr, &value);
+ if (ret == 0)
+ ret = put_user(value, (unsigned int __user *) compat_ptr(data));
+ else
+ ret = -EIO;
+ goto out_tsk;
+
+ case PTRACE_POKETEXT:
+ case PTRACE_POKEDATA: /* write the word at location addr */
+ ret = ia32_poke(child, addr, data);
+ goto out_tsk;
+
+ case PTRACE_PEEKUSR: /* read word at addr in USER area */
+ ret = -EIO;
+ if ((addr & 3) || addr > 17*sizeof(int))
+ break;
+
+ tmp = getreg(child, addr);
+ if (!put_user(tmp, (unsigned int __user *) compat_ptr(data)))
+ ret = 0;
+ break;
+
+ case PTRACE_POKEUSR: /* write word at addr in USER area */
+ ret = -EIO;
+ if ((addr & 3) || addr > 17*sizeof(int))
+ break;
+
+ putreg(child, addr, data);
+ ret = 0;
+ break;
+
+ case IA32_PTRACE_GETREGS:
+ if (!access_ok(VERIFY_WRITE, compat_ptr(data), 17*sizeof(int))) {
+ ret = -EIO;
+ break;
+ }
+ for (i = 0; i < (int) (17*sizeof(int)); i += sizeof(int) ) {
+ put_user(getreg(child, i), (unsigned int __user *) compat_ptr(data));
+ data += sizeof(int);
+ }
+ ret = 0;
+ break;
+
+ case IA32_PTRACE_SETREGS:
+ if (!access_ok(VERIFY_READ, compat_ptr(data), 17*sizeof(int))) {
+ ret = -EIO;
+ break;
+ }
+ for (i = 0; i < (int) (17*sizeof(int)); i += sizeof(int) ) {
+ get_user(tmp, (unsigned int __user *) compat_ptr(data));
+ putreg(child, i, tmp);
+ data += sizeof(int);
+ }
+ ret = 0;
+ break;
+
+ case IA32_PTRACE_GETFPREGS:
+ ret = save_ia32_fpstate(child, (struct ia32_user_i387_struct __user *)
+ compat_ptr(data));
+ break;
+
+ case IA32_PTRACE_GETFPXREGS:
+ ret = save_ia32_fpxstate(child, (struct ia32_user_fxsr_struct __user *)
+ compat_ptr(data));
+ break;
+
+ case IA32_PTRACE_SETFPREGS:
+ ret = restore_ia32_fpstate(child, (struct ia32_user_i387_struct __user *)
+ compat_ptr(data));
+ break;
+
+ case IA32_PTRACE_SETFPXREGS:
+ ret = restore_ia32_fpxstate(child, (struct ia32_user_fxsr_struct __user *)
+ compat_ptr(data));
+ break;
+
+ case PTRACE_GETEVENTMSG:
+ ret = put_user(child->ptrace_message, (unsigned int __user *) compat_ptr(data));
+ break;
+
+ case PTRACE_SYSCALL: /* continue, stop after next syscall */
+ case PTRACE_CONT: /* restart after signal. */
+ case PTRACE_KILL:
+ case PTRACE_SINGLESTEP: /* execute chile for one instruction */
+ case PTRACE_DETACH: /* detach a process */
+ ret = sys_ptrace(request, pid, addr, data);
+ break;
+
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ break;
+
+ }
+ out_tsk:
+ put_task_struct(child);
+ out:
+ unlock_kernel();
+ return ret;
+}
+
+typedef struct {
+ unsigned int ss_sp;
+ unsigned int ss_flags;
+ unsigned int ss_size;
+} ia32_stack_t;
+
+asmlinkage long
+sys32_sigaltstack (ia32_stack_t __user *uss32, ia32_stack_t __user *uoss32,
+ long arg2, long arg3, long arg4, long arg5, long arg6,
+ long arg7, struct pt_regs pt)
+{
+ stack_t uss, uoss;
+ ia32_stack_t buf32;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ if (uss32) {
+ if (copy_from_user(&buf32, uss32, sizeof(ia32_stack_t)))
+ return -EFAULT;
+ uss.ss_sp = (void __user *) (long) buf32.ss_sp;
+ uss.ss_flags = buf32.ss_flags;
+ /* MINSIGSTKSZ is different for ia32 vs ia64. We lie here to pass the
+ check and set it to the user requested value later */
+ if ((buf32.ss_flags != SS_DISABLE) && (buf32.ss_size < MINSIGSTKSZ_IA32)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ uss.ss_size = MINSIGSTKSZ;
+ }
+ set_fs(KERNEL_DS);
+ ret = do_sigaltstack(uss32 ? (stack_t __user *) &uss : NULL,
+ (stack_t __user *) &uoss, pt.r12);
+ current->sas_ss_size = buf32.ss_size;
+ set_fs(old_fs);
+out:
+ if (ret < 0)
+ return(ret);
+ if (uoss32) {
+ buf32.ss_sp = (long __user) uoss.ss_sp;
+ buf32.ss_flags = uoss.ss_flags;
+ buf32.ss_size = uoss.ss_size;
+ if (copy_to_user(uoss32, &buf32, sizeof(ia32_stack_t)))
+ return -EFAULT;
+ }
+ return ret;
+}
+
+asmlinkage int
+sys32_pause (void)
+{
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ return -ERESTARTNOHAND;
+}
+
+asmlinkage int
+sys32_msync (unsigned int start, unsigned int len, int flags)
+{
+ unsigned int addr;
+
+ if (OFFSET4K(start))
+ return -EINVAL;
+ addr = PAGE_START(start);
+ return sys_msync(addr, len + (start - addr), flags);
+}
+
+struct sysctl32 {
+ unsigned int name;
+ int nlen;
+ unsigned int oldval;
+ unsigned int oldlenp;
+ unsigned int newval;
+ unsigned int newlen;
+ unsigned int __unused[4];
+};
+
+#ifdef CONFIG_SYSCTL
+asmlinkage long
+sys32_sysctl (struct sysctl32 __user *args)
+{
+ struct sysctl32 a32;
+ mm_segment_t old_fs = get_fs ();
+ void __user *oldvalp, *newvalp;
+ size_t oldlen;
+ int __user *namep;
+ long ret;
+
+ if (copy_from_user(&a32, args, sizeof(a32)))
+ return -EFAULT;
+
+ /*
+ * We need to pre-validate these because we have to disable address checking
+ * before calling do_sysctl() because of OLDLEN but we can't run the risk of the
+ * user specifying bad addresses here. Well, since we're dealing with 32 bit
+ * addresses, we KNOW that access_ok() will always succeed, so this is an
+ * expensive NOP, but so what...
+ */
+ namep = (int __user *) compat_ptr(a32.name);
+ oldvalp = compat_ptr(a32.oldval);
+ newvalp = compat_ptr(a32.newval);
+
+ if ((oldvalp && get_user(oldlen, (int __user *) compat_ptr(a32.oldlenp)))
+ || !access_ok(VERIFY_WRITE, namep, 0)
+ || !access_ok(VERIFY_WRITE, oldvalp, 0)
+ || !access_ok(VERIFY_WRITE, newvalp, 0))
+ return -EFAULT;
+
+ set_fs(KERNEL_DS);
+ lock_kernel();
+ ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *) &oldlen,
+ newvalp, (size_t) a32.newlen);
+ unlock_kernel();
+ set_fs(old_fs);
+
+ if (oldvalp && put_user (oldlen, (int __user *) compat_ptr(a32.oldlenp)))
+ return -EFAULT;
+
+ return ret;
+}
+#endif
+
+asmlinkage long
+sys32_newuname (struct new_utsname __user *name)
+{
+ int ret = sys_newuname(name);
+
+ if (!ret)
+ if (copy_to_user(name->machine, "i686\0\0\0", 8))
+ ret = -EFAULT;
+ return ret;
+}
+
+asmlinkage long
+sys32_getresuid16 (u16 __user *ruid, u16 __user *euid, u16 __user *suid)
+{
+ uid_t a, b, c;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs(KERNEL_DS);
+ ret = sys_getresuid((uid_t __user *) &a, (uid_t __user *) &b, (uid_t __user *) &c);
+ set_fs(old_fs);
+
+ if (put_user(a, ruid) || put_user(b, euid) || put_user(c, suid))
+ return -EFAULT;
+ return ret;
+}
+
+asmlinkage long
+sys32_getresgid16 (u16 __user *rgid, u16 __user *egid, u16 __user *sgid)
+{
+ gid_t a, b, c;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs(KERNEL_DS);
+ ret = sys_getresgid((gid_t __user *) &a, (gid_t __user *) &b, (gid_t __user *) &c);
+ set_fs(old_fs);
+
+ if (ret)
+ return ret;
+
+ return put_user(a, rgid) | put_user(b, egid) | put_user(c, sgid);
+}
+
+asmlinkage long
+sys32_lseek (unsigned int fd, int offset, unsigned int whence)
+{
+ /* Sign-extension of "offset" is important here... */
+ return sys_lseek(fd, offset, whence);
+}
+
+static int
+groups16_to_user(short __user *grouplist, struct group_info *group_info)
+{
+ int i;
+ short group;
+
+ for (i = 0; i < group_info->ngroups; i++) {
+ group = (short)GROUP_AT(group_info, i);
+ if (put_user(group, grouplist+i))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int
+groups16_from_user(struct group_info *group_info, short __user *grouplist)
+{
+ int i;
+ short group;
+
+ for (i = 0; i < group_info->ngroups; i++) {
+ if (get_user(group, grouplist+i))
+ return -EFAULT;
+ GROUP_AT(group_info, i) = (gid_t)group;
+ }
+
+ return 0;
+}
+
+asmlinkage long
+sys32_getgroups16 (int gidsetsize, short __user *grouplist)
+{
+ int i;
+
+ if (gidsetsize < 0)
+ return -EINVAL;
+
+ get_group_info(current->group_info);
+ i = current->group_info->ngroups;
+ if (gidsetsize) {
+ if (i > gidsetsize) {
+ i = -EINVAL;
+ goto out;
+ }
+ if (groups16_to_user(grouplist, current->group_info)) {
+ i = -EFAULT;
+ goto out;
+ }
+ }
+out:
+ put_group_info(current->group_info);
+ return i;
+}
+
+asmlinkage long
+sys32_setgroups16 (int gidsetsize, short __user *grouplist)
+{
+ struct group_info *group_info;
+ int retval;
+
+ if (!capable(CAP_SETGID))
+ return -EPERM;
+ if ((unsigned)gidsetsize > NGROUPS_MAX)
+ return -EINVAL;
+
+ group_info = groups_alloc(gidsetsize);
+ if (!group_info)
+ return -ENOMEM;
+ retval = groups16_from_user(group_info, grouplist);
+ if (retval) {
+ put_group_info(group_info);
+ return retval;
+ }
+
+ retval = set_current_groups(group_info);
+ put_group_info(group_info);
+
+ return retval;
+}
+
+asmlinkage long
+sys32_truncate64 (unsigned int path, unsigned int len_lo, unsigned int len_hi)
+{
+ return sys_truncate(compat_ptr(path), ((unsigned long) len_hi << 32) | len_lo);
+}
+
+asmlinkage long
+sys32_ftruncate64 (int fd, unsigned int len_lo, unsigned int len_hi)
+{
+ return sys_ftruncate(fd, ((unsigned long) len_hi << 32) | len_lo);
+}
+
+static int
+putstat64 (struct stat64 __user *ubuf, struct kstat *kbuf)
+{
+ int err;
+ u64 hdev;
+
+ if (clear_user(ubuf, sizeof(*ubuf)))
+ return -EFAULT;
+
+ hdev = huge_encode_dev(kbuf->dev);
+ err = __put_user(hdev, (u32 __user*)&ubuf->st_dev);
+ err |= __put_user(hdev >> 32, ((u32 __user*)&ubuf->st_dev) + 1);
+ err |= __put_user(kbuf->ino, &ubuf->__st_ino);
+ err |= __put_user(kbuf->ino, &ubuf->st_ino_lo);
+ err |= __put_user(kbuf->ino >> 32, &ubuf->st_ino_hi);
+ err |= __put_user(kbuf->mode, &ubuf->st_mode);
+ err |= __put_user(kbuf->nlink, &ubuf->st_nlink);
+ err |= __put_user(kbuf->uid, &ubuf->st_uid);
+ err |= __put_user(kbuf->gid, &ubuf->st_gid);
+ hdev = huge_encode_dev(kbuf->rdev);
+ err = __put_user(hdev, (u32 __user*)&ubuf->st_rdev);
+ err |= __put_user(hdev >> 32, ((u32 __user*)&ubuf->st_rdev) + 1);
+ err |= __put_user(kbuf->size, &ubuf->st_size_lo);
+ err |= __put_user((kbuf->size >> 32), &ubuf->st_size_hi);
+ err |= __put_user(kbuf->atime.tv_sec, &ubuf->st_atime);
+ err |= __put_user(kbuf->atime.tv_nsec, &ubuf->st_atime_nsec);
+ err |= __put_user(kbuf->mtime.tv_sec, &ubuf->st_mtime);
+ err |= __put_user(kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec);
+ err |= __put_user(kbuf->ctime.tv_sec, &ubuf->st_ctime);
+ err |= __put_user(kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec);
+ err |= __put_user(kbuf->blksize, &ubuf->st_blksize);
+ err |= __put_user(kbuf->blocks, &ubuf->st_blocks);
+ return err;
+}
+
+asmlinkage long
+sys32_stat64 (char __user *filename, struct stat64 __user *statbuf)
+{
+ struct kstat s;
+ long ret = vfs_stat(filename, &s);
+ if (!ret)
+ ret = putstat64(statbuf, &s);
+ return ret;
+}
+
+asmlinkage long
+sys32_lstat64 (char __user *filename, struct stat64 __user *statbuf)
+{
+ struct kstat s;
+ long ret = vfs_lstat(filename, &s);
+ if (!ret)
+ ret = putstat64(statbuf, &s);
+ return ret;
+}
+
+asmlinkage long
+sys32_fstat64 (unsigned int fd, struct stat64 __user *statbuf)
+{
+ struct kstat s;
+ long ret = vfs_fstat(fd, &s);
+ if (!ret)
+ ret = putstat64(statbuf, &s);
+ return ret;
+}
+
+struct sysinfo32 {
+ s32 uptime;
+ u32 loads[3];
+ u32 totalram;
+ u32 freeram;
+ u32 sharedram;
+ u32 bufferram;
+ u32 totalswap;
+ u32 freeswap;
+ u16 procs;
+ u16 pad;
+ u32 totalhigh;
+ u32 freehigh;
+ u32 mem_unit;
+ char _f[8];
+};
+
+asmlinkage long
+sys32_sysinfo (struct sysinfo32 __user *info)
+{
+ struct sysinfo s;
+ long ret, err;
+ int bitcount = 0;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs(KERNEL_DS);
+ ret = sys_sysinfo((struct sysinfo __user *) &s);
+ set_fs(old_fs);
+ /* Check to see if any memory value is too large for 32-bit and
+ * scale down if needed.
+ */
+ if ((s.totalram >> 32) || (s.totalswap >> 32)) {
+ while (s.mem_unit < PAGE_SIZE) {
+ s.mem_unit <<= 1;
+ bitcount++;
+ }
+ s.totalram >>= bitcount;
+ s.freeram >>= bitcount;
+ s.sharedram >>= bitcount;
+ s.bufferram >>= bitcount;
+ s.totalswap >>= bitcount;
+ s.freeswap >>= bitcount;
+ s.totalhigh >>= bitcount;
+ s.freehigh >>= bitcount;
+ }
+
+ if (!access_ok(VERIFY_WRITE, info, sizeof(*info)))
+ return -EFAULT;
+
+ err = __put_user(s.uptime, &info->uptime);
+ err |= __put_user(s.loads[0], &info->loads[0]);
+ err |= __put_user(s.loads[1], &info->loads[1]);
+ err |= __put_user(s.loads[2], &info->loads[2]);
+ err |= __put_user(s.totalram, &info->totalram);
+ err |= __put_user(s.freeram, &info->freeram);
+ err |= __put_user(s.sharedram, &info->sharedram);
+ err |= __put_user(s.bufferram, &info->bufferram);
+ err |= __put_user(s.totalswap, &info->totalswap);
+ err |= __put_user(s.freeswap, &info->freeswap);
+ err |= __put_user(s.procs, &info->procs);
+ err |= __put_user (s.totalhigh, &info->totalhigh);
+ err |= __put_user (s.freehigh, &info->freehigh);
+ err |= __put_user (s.mem_unit, &info->mem_unit);
+ if (err)
+ return -EFAULT;
+ return ret;
+}
+
+asmlinkage long
+sys32_sched_rr_get_interval (pid_t pid, struct compat_timespec __user *interval)
+{
+ mm_segment_t old_fs = get_fs();
+ struct timespec t;
+ long ret;
+
+ set_fs(KERNEL_DS);
+ ret = sys_sched_rr_get_interval(pid, (struct timespec __user *) &t);
+ set_fs(old_fs);
+ if (put_compat_timespec(&t, interval))
+ return -EFAULT;
+ return ret;
+}
+
+asmlinkage long
+sys32_pread (unsigned int fd, void __user *buf, unsigned int count, u32 pos_lo, u32 pos_hi)
+{
+ return sys_pread64(fd, buf, count, ((unsigned long) pos_hi << 32) | pos_lo);
+}
+
+asmlinkage long
+sys32_pwrite (unsigned int fd, void __user *buf, unsigned int count, u32 pos_lo, u32 pos_hi)
+{
+ return sys_pwrite64(fd, buf, count, ((unsigned long) pos_hi << 32) | pos_lo);
+}
+
+asmlinkage long
+sys32_sendfile (int out_fd, int in_fd, int __user *offset, unsigned int count)
+{
+ mm_segment_t old_fs = get_fs();
+ long ret;
+ off_t of;
+
+ if (offset && get_user(of, offset))
+ return -EFAULT;
+
+ set_fs(KERNEL_DS);
+ ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *) &of : NULL, count);
+ set_fs(old_fs);
+
+ if (!ret && offset && put_user(of, offset))
+ return -EFAULT;
+
+ return ret;
+}
+
+asmlinkage long
+sys32_personality (unsigned int personality)
+{
+ long ret;
+
+ if (current->personality == PER_LINUX32 && personality == PER_LINUX)
+ personality = PER_LINUX32;
+ ret = sys_personality(personality);
+ if (ret == PER_LINUX32)
+ ret = PER_LINUX;
+ return ret;
+}
+
+asmlinkage unsigned long
+sys32_brk (unsigned int brk)
+{
+ unsigned long ret, obrk;
+ struct mm_struct *mm = current->mm;
+
+ obrk = mm->brk;
+ ret = sys_brk(brk);
+ if (ret < obrk)
+ clear_user(compat_ptr(ret), PAGE_ALIGN(ret) - ret);
+ return ret;
+}
+
+/*
+ * Exactly like fs/open.c:sys_open(), except that it doesn't set the O_LARGEFILE flag.
+ */
+asmlinkage long
+sys32_open (const char __user * filename, int flags, int mode)
+{
+ char * tmp;
+ int fd, error;
+
+ tmp = getname(filename);
+ fd = PTR_ERR(tmp);
+ if (!IS_ERR(tmp)) {
+ fd = get_unused_fd();
+ if (fd >= 0) {
+ struct file *f = filp_open(tmp, flags, mode);
+ error = PTR_ERR(f);
+ if (IS_ERR(f))
+ goto out_error;
+ fd_install(fd, f);
+ }
+out:
+ putname(tmp);
+ }
+ return fd;
+
+out_error:
+ put_unused_fd(fd);
+ fd = error;
+ goto out;
+}
+
+/* Structure for ia32 emulation on ia64 */
+struct epoll_event32
+{
+ u32 events;
+ u32 data[2];
+};
+
+asmlinkage long
+sys32_epoll_ctl(int epfd, int op, int fd, struct epoll_event32 __user *event)
+{
+ mm_segment_t old_fs = get_fs();
+ struct epoll_event event64;
+ int error;
+ u32 data_halfword;
+
+ if (!access_ok(VERIFY_READ, event, sizeof(struct epoll_event32)))
+ return -EFAULT;
+
+ __get_user(event64.events, &event->events);
+ __get_user(data_halfword, &event->data[0]);
+ event64.data = data_halfword;
+ __get_user(data_halfword, &event->data[1]);
+ event64.data |= (u64)data_halfword << 32;
+
+ set_fs(KERNEL_DS);
+ error = sys_epoll_ctl(epfd, op, fd, (struct epoll_event __user *) &event64);
+ set_fs(old_fs);
+
+ return error;
+}
+
+asmlinkage long
+sys32_epoll_wait(int epfd, struct epoll_event32 __user * events, int maxevents,
+ int timeout)
+{
+ struct epoll_event *events64 = NULL;
+ mm_segment_t old_fs = get_fs();
+ int error, numevents, size;
+ int evt_idx;
+ int do_free_pages = 0;
+
+ if (maxevents <= 0) {
+ return -EINVAL;
+ }
+
+ /* Verify that the area passed by the user is writeable */
+ if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event32)))
+ return -EFAULT;
+
+ /*
+ * Allocate space for the intermediate copy. If the space needed
+ * is large enough to cause kmalloc to fail, then try again with
+ * __get_free_pages.
+ */
+ size = maxevents * sizeof(struct epoll_event);
+ events64 = kmalloc(size, GFP_KERNEL);
+ if (events64 == NULL) {
+ events64 = (struct epoll_event *)
+ __get_free_pages(GFP_KERNEL, get_order(size));
+ if (events64 == NULL)
+ return -ENOMEM;
+ do_free_pages = 1;
+ }
+
+ /* Do the system call */
+ set_fs(KERNEL_DS); /* copy_to/from_user should work on kernel mem*/
+ numevents = sys_epoll_wait(epfd, (struct epoll_event __user *) events64,
+ maxevents, timeout);
+ set_fs(old_fs);
+
+ /* Don't modify userspace memory if we're returning an error */
+ if (numevents > 0) {
+ /* Translate the 64-bit structures back into the 32-bit
+ structures */
+ for (evt_idx = 0; evt_idx < numevents; evt_idx++) {
+ __put_user(events64[evt_idx].events,
+ &events[evt_idx].events);
+ __put_user((u32)events64[evt_idx].data,
+ &events[evt_idx].data[0]);
+ __put_user((u32)(events64[evt_idx].data >> 32),
+ &events[evt_idx].data[1]);
+ }
+ }
+
+ if (do_free_pages)
+ free_pages((unsigned long) events64, get_order(size));
+ else
+ kfree(events64);
+ return numevents;
+}
+
+/*
+ * Get a yet unused TLS descriptor index.
+ */
+static int
+get_free_idx (void)
+{
+ struct thread_struct *t = &current->thread;
+ int idx;
+
+ for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+ if (desc_empty(t->tls_array + idx))
+ return idx + GDT_ENTRY_TLS_MIN;
+ return -ESRCH;
+}
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int
+sys32_set_thread_area (struct ia32_user_desc __user *u_info)
+{
+ struct thread_struct *t = &current->thread;
+ struct ia32_user_desc info;
+ struct desc_struct *desc;
+ int cpu, idx;
+
+ if (copy_from_user(&info, u_info, sizeof(info)))
+ return -EFAULT;
+ idx = info.entry_number;
+
+ /*
+ * index -1 means the kernel should try to find and allocate an empty descriptor:
+ */
+ if (idx == -1) {
+ idx = get_free_idx();
+ if (idx < 0)
+ return idx;
+ if (put_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ }
+
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+ cpu = smp_processor_id();
+
+ if (LDT_empty(&info)) {
+ desc->a = 0;
+ desc->b = 0;
+ } else {
+ desc->a = LDT_entry_a(&info);
+ desc->b = LDT_entry_b(&info);
+ }
+ load_TLS(t, cpu);
+ return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+ (((desc)->a >> 16) & 0x0000ffff) | \
+ (((desc)->b << 16) & 0x00ff0000) | \
+ ( (desc)->b & 0xff000000) )
+
+#define GET_LIMIT(desc) ( \
+ ((desc)->a & 0x0ffff) | \
+ ((desc)->b & 0xf0000) )
+
+#define GET_32BIT(desc) (((desc)->b >> 22) & 1)
+#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
+#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
+
+asmlinkage int
+sys32_get_thread_area (struct ia32_user_desc __user *u_info)
+{
+ struct ia32_user_desc info;
+ struct desc_struct *desc;
+ int idx;
+
+ if (get_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+ info.entry_number = idx;
+ info.base_addr = GET_BASE(desc);
+ info.limit = GET_LIMIT(desc);
+ info.seg_32bit = GET_32BIT(desc);
+ info.contents = GET_CONTENTS(desc);
+ info.read_exec_only = !GET_WRITABLE(desc);
+ info.limit_in_pages = GET_LIMIT_PAGES(desc);
+ info.seg_not_present = !GET_PRESENT(desc);
+ info.useable = GET_USEABLE(desc);
+
+ if (copy_to_user(u_info, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
+}
+
+asmlinkage long
+sys32_timer_create(u32 clock, struct compat_sigevent __user *se32, timer_t __user *timer_id)
+{
+ struct sigevent se;
+ mm_segment_t oldfs;
+ timer_t t;
+ long err;
+
+ if (se32 == NULL)
+ return sys_timer_create(clock, NULL, timer_id);
+
+ if (get_compat_sigevent(&se, se32))
+ return -EFAULT;
+
+ if (!access_ok(VERIFY_WRITE,timer_id,sizeof(timer_t)))
+ return -EFAULT;
+
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ err = sys_timer_create(clock, (struct sigevent __user *) &se, (timer_t __user *) &t);
+ set_fs(oldfs);
+
+ if (!err)
+ err = __put_user (t, timer_id);
+
+ return err;
+}
+
+long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
+ __u32 len_low, __u32 len_high, int advice)
+{
+ return sys_fadvise64_64(fd,
+ (((u64)offset_high)<<32) | offset_low,
+ (((u64)len_high)<<32) | len_low,
+ advice);
+}
+
+#ifdef NOTYET /* UNTESTED FOR IA64 FROM HERE DOWN */
+
+asmlinkage long sys32_setreuid(compat_uid_t ruid, compat_uid_t euid)
+{
+ uid_t sruid, seuid;
+
+ sruid = (ruid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)ruid);
+ seuid = (euid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)euid);
+ return sys_setreuid(sruid, seuid);
+}
+
+asmlinkage long
+sys32_setresuid(compat_uid_t ruid, compat_uid_t euid,
+ compat_uid_t suid)
+{
+ uid_t sruid, seuid, ssuid;
+
+ sruid = (ruid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)ruid);
+ seuid = (euid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)euid);
+ ssuid = (suid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)suid);
+ return sys_setresuid(sruid, seuid, ssuid);
+}
+
+asmlinkage long
+sys32_setregid(compat_gid_t rgid, compat_gid_t egid)
+{
+ gid_t srgid, segid;
+
+ srgid = (rgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)rgid);
+ segid = (egid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)egid);
+ return sys_setregid(srgid, segid);
+}
+
+asmlinkage long
+sys32_setresgid(compat_gid_t rgid, compat_gid_t egid,
+ compat_gid_t sgid)
+{
+ gid_t srgid, segid, ssgid;
+
+ srgid = (rgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)rgid);
+ segid = (egid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)egid);
+ ssgid = (sgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)sgid);
+ return sys_setresgid(srgid, segid, ssgid);
+}
+
+/* Handle adjtimex compatibility. */
+
+struct timex32 {
+ u32 modes;
+ s32 offset, freq, maxerror, esterror;
+ s32 status, constant, precision, tolerance;
+ struct compat_timeval time;
+ s32 tick;
+ s32 ppsfreq, jitter, shift, stabil;
+ s32 jitcnt, calcnt, errcnt, stbcnt;
+ s32 :32; s32 :32; s32 :32; s32 :32;
+ s32 :32; s32 :32; s32 :32; s32 :32;
+ s32 :32; s32 :32; s32 :32; s32 :32;
+};
+
+extern int do_adjtimex(struct timex *);
+
+asmlinkage long
+sys32_adjtimex(struct timex32 *utp)
+{
+ struct timex txc;
+ int ret;
+
+ memset(&txc, 0, sizeof(struct timex));
+
+ if(get_user(txc.modes, &utp->modes) ||
+ __get_user(txc.offset, &utp->offset) ||
+ __get_user(txc.freq, &utp->freq) ||
+ __get_user(txc.maxerror, &utp->maxerror) ||
+ __get_user(txc.esterror, &utp->esterror) ||
+ __get_user(txc.status, &utp->status) ||
+ __get_user(txc.constant, &utp->constant) ||
+ __get_user(txc.precision, &utp->precision) ||
+ __get_user(txc.tolerance, &utp->tolerance) ||
+ __get_user(txc.time.tv_sec, &utp->time.tv_sec) ||
+ __get_user(txc.time.tv_usec, &utp->time.tv_usec) ||
+ __get_user(txc.tick, &utp->tick) ||
+ __get_user(txc.ppsfreq, &utp->ppsfreq) ||
+ __get_user(txc.jitter, &utp->jitter) ||
+ __get_user(txc.shift, &utp->shift) ||
+ __get_user(txc.stabil, &utp->stabil) ||
+ __get_user(txc.jitcnt, &utp->jitcnt) ||
+ __get_user(txc.calcnt, &utp->calcnt) ||
+ __get_user(txc.errcnt, &utp->errcnt) ||
+ __get_user(txc.stbcnt, &utp->stbcnt))
+ return -EFAULT;
+
+ ret = do_adjtimex(&txc);
+
+ if(put_user(txc.modes, &utp->modes) ||
+ __put_user(txc.offset, &utp->offset) ||
+ __put_user(txc.freq, &utp->freq) ||
+ __put_user(txc.maxerror, &utp->maxerror) ||
+ __put_user(txc.esterror, &utp->esterror) ||
+ __put_user(txc.status, &utp->status) ||
+ __put_user(txc.constant, &utp->constant) ||
+ __put_user(txc.precision, &utp->precision) ||
+ __put_user(txc.tolerance, &utp->tolerance) ||
+ __put_user(txc.time.tv_sec, &utp->time.tv_sec) ||
+ __put_user(txc.time.tv_usec, &utp->time.tv_usec) ||
+ __put_user(txc.tick, &utp->tick) ||
+ __put_user(txc.ppsfreq, &utp->ppsfreq) ||
+ __put_user(txc.jitter, &utp->jitter) ||
+ __put_user(txc.shift, &utp->shift) ||
+ __put_user(txc.stabil, &utp->stabil) ||
+ __put_user(txc.jitcnt, &utp->jitcnt) ||
+ __put_user(txc.calcnt, &utp->calcnt) ||
+ __put_user(txc.errcnt, &utp->errcnt) ||
+ __put_user(txc.stbcnt, &utp->stbcnt))
+ ret = -EFAULT;
+
+ return ret;
+}
+#endif /* NOTYET */
diff --git a/arch/ia64/install.sh b/arch/ia64/install.sh
new file mode 100644
index 00000000000..929e780026d
--- /dev/null
+++ b/arch/ia64/install.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+#
+# arch/ia64/install.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install" script for ia64 architecture
+#
+# Arguments:
+# $1 - kernel version
+# $2 - kernel image file
+# $3 - kernel map file
+# $4 - default install path (blank if root directory)
+#
+
+# User may have a custom install script
+
+if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi
+if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi
+
+# Default install - same as make zlilo
+
+if [ -f $4/vmlinuz ]; then
+ mv $4/vmlinuz $4/vmlinuz.old
+fi
+
+if [ -f $4/System.map ]; then
+ mv $4/System.map $4/System.old
+fi
+
+cat $2 > $4/vmlinuz
+cp $3 $4/System.map
+
+test -x /usr/sbin/elilo && /usr/sbin/elilo
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
new file mode 100644
index 00000000000..c1a02bbc252
--- /dev/null
+++ b/arch/ia64/kernel/Makefile
@@ -0,0 +1,52 @@
+#
+# Makefile for the linux kernel.
+#
+
+extra-y := head.o init_task.o vmlinux.lds
+
+obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
+ irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
+ salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+ unwind.o mca.o mca_asm.o topology.o
+
+obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o
+obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o
+obj-$(CONFIG_IA64_HP_ZX1) += acpi-ext.o
+obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o
+obj-$(CONFIG_IA64_PALINFO) += palinfo.o
+obj-$(CONFIG_IOSAPIC) += iosapic.o
+obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o
+obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o
+obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
+obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
+mca_recovery-y += mca_drv.o mca_drv_asm.o
+
+# The gate DSO image is built using a special linker script.
+targets += gate.so gate-syms.o
+
+extra-y += gate.so gate-syms.o gate.lds gate.o
+
+# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
+CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31
+
+CPPFLAGS_gate.lds := -P -C -U$(ARCH)
+
+quiet_cmd_gate = GATE $@
+ cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
+
+GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1
+$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
+ $(call if_changed,gate)
+
+$(obj)/built-in.o: $(obj)/gate-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
+
+GATECFLAGS_gate-syms.o = -r
+$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
+ $(call if_changed,gate)
+
+# gate-data.o contains the gate DSO image as data in section .data.gate.
+# We must build gate.so before we can assemble it.
+# Note: kbuild does not track this dependency due to usage of .incbin
+$(obj)/gate-data.o: $(obj)/gate.so
diff --git a/arch/ia64/kernel/acpi-ext.c b/arch/ia64/kernel/acpi-ext.c
new file mode 100644
index 00000000000..2623df5e263
--- /dev/null
+++ b/arch/ia64/kernel/acpi-ext.c
@@ -0,0 +1,100 @@
+/*
+ * arch/ia64/kernel/acpi-ext.c
+ *
+ * Copyright (C) 2003 Hewlett-Packard
+ * Copyright (C) Alex Williamson
+ * Copyright (C) Bjorn Helgaas
+ *
+ * Vendor specific extensions to ACPI.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+
+#include <asm/acpi-ext.h>
+
+struct acpi_vendor_descriptor {
+ u8 guid_id;
+ efi_guid_t guid;
+};
+
+struct acpi_vendor_info {
+ struct acpi_vendor_descriptor *descriptor;
+ u8 *data;
+ u32 length;
+};
+
+acpi_status
+acpi_vendor_resource_match(struct acpi_resource *resource, void *context)
+{
+ struct acpi_vendor_info *info = (struct acpi_vendor_info *) context;
+ struct acpi_resource_vendor *vendor;
+ struct acpi_vendor_descriptor *descriptor;
+ u32 length;
+
+ if (resource->id != ACPI_RSTYPE_VENDOR)
+ return AE_OK;
+
+ vendor = (struct acpi_resource_vendor *) &resource->data;
+ descriptor = (struct acpi_vendor_descriptor *) vendor->reserved;
+ if (vendor->length <= sizeof(*info->descriptor) ||
+ descriptor->guid_id != info->descriptor->guid_id ||
+ efi_guidcmp(descriptor->guid, info->descriptor->guid))
+ return AE_OK;
+
+ length = vendor->length - sizeof(struct acpi_vendor_descriptor);
+ info->data = acpi_os_allocate(length);
+ if (!info->data)
+ return AE_NO_MEMORY;
+
+ memcpy(info->data, vendor->reserved + sizeof(struct acpi_vendor_descriptor), length);
+ info->length = length;
+ return AE_CTRL_TERMINATE;
+}
+
+acpi_status
+acpi_find_vendor_resource(acpi_handle obj, struct acpi_vendor_descriptor *id,
+ u8 **data, u32 *length)
+{
+ struct acpi_vendor_info info;
+
+ info.descriptor = id;
+ info.data = NULL;
+
+ acpi_walk_resources(obj, METHOD_NAME__CRS, acpi_vendor_resource_match, &info);
+ if (!info.data)
+ return AE_NOT_FOUND;
+
+ *data = info.data;
+ *length = info.length;
+ return AE_OK;
+}
+
+struct acpi_vendor_descriptor hp_ccsr_descriptor = {
+ .guid_id = 2,
+ .guid = EFI_GUID(0x69e9adf9, 0x924f, 0xab5f, 0xf6, 0x4a, 0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad)
+};
+
+acpi_status
+hp_acpi_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length)
+{
+ acpi_status status;
+ u8 *data;
+ u32 length;
+
+ status = acpi_find_vendor_resource(obj, &hp_ccsr_descriptor, &data, &length);
+
+ if (ACPI_FAILURE(status) || length != 16)
+ return AE_NOT_FOUND;
+
+ memcpy(csr_base, data, sizeof(*csr_base));
+ memcpy(csr_length, data + 8, sizeof(*csr_length));
+ acpi_os_free(data);
+
+ return AE_OK;
+}
+
+EXPORT_SYMBOL(hp_acpi_csr_space);
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
new file mode 100644
index 00000000000..a8e99c56a76
--- /dev/null
+++ b/arch/ia64/kernel/acpi.c
@@ -0,0 +1,841 @@
+/*
+ * acpi.c - Architecture-Specific Low-Level ACPI Support
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co.
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2000 Intel Corp.
+ * Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@intel.com>
+ * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ * Copyright (C) 2001 Jenna Hall <jenna.s.hall@intel.com>
+ * Copyright (C) 2001 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
+ * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/mmzone.h>
+#include <linux/nodemask.h>
+#include <asm/io.h>
+#include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/system.h>
+#include <asm/numa.h>
+#include <asm/sal.h>
+#include <asm/cyclone.h>
+
+#define BAD_MADT_ENTRY(entry, end) ( \
+ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
+ ((acpi_table_entry_header *)entry)->length != sizeof(*entry))
+
+#define PREFIX "ACPI: "
+
+void (*pm_idle) (void);
+EXPORT_SYMBOL(pm_idle);
+void (*pm_power_off) (void);
+EXPORT_SYMBOL(pm_power_off);
+
+unsigned char acpi_kbd_controller_present = 1;
+unsigned char acpi_legacy_devices;
+
+#define MAX_SAPICS 256
+u16 ia64_acpiid_to_sapicid[MAX_SAPICS] =
+ { [0 ... MAX_SAPICS - 1] = -1 };
+EXPORT_SYMBOL(ia64_acpiid_to_sapicid);
+
+const char *
+acpi_get_sysname (void)
+{
+#ifdef CONFIG_IA64_GENERIC
+ unsigned long rsdp_phys;
+ struct acpi20_table_rsdp *rsdp;
+ struct acpi_table_xsdt *xsdt;
+ struct acpi_table_header *hdr;
+
+ rsdp_phys = acpi_find_rsdp();
+ if (!rsdp_phys) {
+ printk(KERN_ERR "ACPI 2.0 RSDP not found, default to \"dig\"\n");
+ return "dig";
+ }
+
+ rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys);
+ if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) {
+ printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
+ return "dig";
+ }
+
+ xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address);
+ hdr = &xsdt->header;
+ if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) {
+ printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
+ return "dig";
+ }
+
+ if (!strcmp(hdr->oem_id, "HP")) {
+ return "hpzx1";
+ }
+ else if (!strcmp(hdr->oem_id, "SGI")) {
+ return "sn2";
+ }
+
+ return "dig";
+#else
+# if defined (CONFIG_IA64_HP_SIM)
+ return "hpsim";
+# elif defined (CONFIG_IA64_HP_ZX1)
+ return "hpzx1";
+# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB)
+ return "hpzx1_swiotlb";
+# elif defined (CONFIG_IA64_SGI_SN2)
+ return "sn2";
+# elif defined (CONFIG_IA64_DIG)
+ return "dig";
+# else
+# error Unknown platform. Fix acpi.c.
+# endif
+#endif
+}
+
+#ifdef CONFIG_ACPI_BOOT
+
+#define ACPI_MAX_PLATFORM_INTERRUPTS 256
+
+/* Array to record platform interrupt vectors for generic interrupt routing. */
+int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = {
+ [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1
+};
+
+enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC;
+
+/*
+ * Interrupt routing API for device drivers. Provides interrupt vector for
+ * a generic platform event. Currently only CPEI is implemented.
+ */
+int
+acpi_request_vector (u32 int_type)
+{
+ int vector = -1;
+
+ if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) {
+ /* corrected platform error interrupt */
+ vector = platform_intr_list[int_type];
+ } else
+ printk(KERN_ERR "acpi_request_vector(): invalid interrupt type\n");
+ return vector;
+}
+
+char *
+__acpi_map_table (unsigned long phys_addr, unsigned long size)
+{
+ return __va(phys_addr);
+}
+
+/* --------------------------------------------------------------------------
+ Boot-time Table Parsing
+ -------------------------------------------------------------------------- */
+
+static int total_cpus __initdata;
+static int available_cpus __initdata;
+struct acpi_table_madt * acpi_madt __initdata;
+static u8 has_8259;
+
+
+static int __init
+acpi_parse_lapic_addr_ovr (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lapic_addr_ovr *lapic;
+
+ lapic = (struct acpi_table_lapic_addr_ovr *) header;
+
+ if (BAD_MADT_ENTRY(lapic, end))
+ return -EINVAL;
+
+ if (lapic->address) {
+ iounmap(ipi_base_addr);
+ ipi_base_addr = ioremap(lapic->address, 0);
+ }
+ return 0;
+}
+
+
+static int __init
+acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lsapic *lsapic;
+
+ lsapic = (struct acpi_table_lsapic *) header;
+
+ if (BAD_MADT_ENTRY(lsapic, end))
+ return -EINVAL;
+
+ if (lsapic->flags.enabled) {
+#ifdef CONFIG_SMP
+ smp_boot_data.cpu_phys_id[available_cpus] = (lsapic->id << 8) | lsapic->eid;
+#endif
+ ia64_acpiid_to_sapicid[lsapic->acpi_id] = (lsapic->id << 8) | lsapic->eid;
+ ++available_cpus;
+ }
+
+ total_cpus++;
+ return 0;
+}
+
+
+static int __init
+acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lapic_nmi *lacpi_nmi;
+
+ lacpi_nmi = (struct acpi_table_lapic_nmi*) header;
+
+ if (BAD_MADT_ENTRY(lacpi_nmi, end))
+ return -EINVAL;
+
+ /* TBD: Support lapic_nmi entries */
+ return 0;
+}
+
+
+static int __init
+acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_iosapic *iosapic;
+
+ iosapic = (struct acpi_table_iosapic *) header;
+
+ if (BAD_MADT_ENTRY(iosapic, end))
+ return -EINVAL;
+
+ iosapic_init(iosapic->address, iosapic->global_irq_base);
+
+ return 0;
+}
+
+
+static int __init
+acpi_parse_plat_int_src (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_plat_int_src *plintsrc;
+ int vector;
+
+ plintsrc = (struct acpi_table_plat_int_src *) header;
+
+ if (BAD_MADT_ENTRY(plintsrc, end))
+ return -EINVAL;
+
+ /*
+ * Get vector assignment for this interrupt, set attributes,
+ * and program the IOSAPIC routing table.
+ */
+ vector = iosapic_register_platform_intr(plintsrc->type,
+ plintsrc->global_irq,
+ plintsrc->iosapic_vector,
+ plintsrc->eid,
+ plintsrc->id,
+ (plintsrc->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+ (plintsrc->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+
+ platform_intr_list[plintsrc->type] = vector;
+ return 0;
+}
+
+
+static int __init
+acpi_parse_int_src_ovr (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_int_src_ovr *p;
+
+ p = (struct acpi_table_int_src_ovr *) header;
+
+ if (BAD_MADT_ENTRY(p, end))
+ return -EINVAL;
+
+ iosapic_override_isa_irq(p->bus_irq, p->global_irq,
+ (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+ (p->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+ return 0;
+}
+
+
+static int __init
+acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_nmi_src *nmi_src;
+
+ nmi_src = (struct acpi_table_nmi_src*) header;
+
+ if (BAD_MADT_ENTRY(nmi_src, end))
+ return -EINVAL;
+
+ /* TBD: Support nimsrc entries */
+ return 0;
+}
+
+static void __init
+acpi_madt_oem_check (char *oem_id, char *oem_table_id)
+{
+ if (!strncmp(oem_id, "IBM", 3) &&
+ (!strncmp(oem_table_id, "SERMOW", 6))) {
+
+ /*
+ * Unfortunately ITC_DRIFT is not yet part of the
+ * official SAL spec, so the ITC_DRIFT bit is not
+ * set by the BIOS on this hardware.
+ */
+ sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT;
+
+ cyclone_setup();
+ }
+}
+
+static int __init
+acpi_parse_madt (unsigned long phys_addr, unsigned long size)
+{
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ acpi_madt = (struct acpi_table_madt *) __va(phys_addr);
+
+ /* remember the value for reference after free_initmem() */
+#ifdef CONFIG_ITANIUM
+ has_8259 = 1; /* Firmware on old Itanium systems is broken */
+#else
+ has_8259 = acpi_madt->flags.pcat_compat;
+#endif
+ iosapic_system_init(has_8259);
+
+ /* Get base address of IPI Message Block */
+
+ if (acpi_madt->lapic_address)
+ ipi_base_addr = ioremap(acpi_madt->lapic_address, 0);
+
+ printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr);
+
+ acpi_madt_oem_check(acpi_madt->header.oem_id,
+ acpi_madt->header.oem_table_id);
+
+ return 0;
+}
+
+
+#ifdef CONFIG_ACPI_NUMA
+
+#undef SLIT_DEBUG
+
+#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
+
+static int __initdata srat_num_cpus; /* number of cpus */
+static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
+#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag))
+#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag))
+/* maps to convert between proximity domain and logical node ID */
+int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS];
+int __initdata nid_to_pxm_map[MAX_NUMNODES];
+static struct acpi_table_slit __initdata *slit_table;
+
+/*
+ * ACPI 2.0 SLIT (System Locality Information Table)
+ * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
+ */
+void __init
+acpi_numa_slit_init (struct acpi_table_slit *slit)
+{
+ u32 len;
+
+ len = sizeof(struct acpi_table_header) + 8
+ + slit->localities * slit->localities;
+ if (slit->header.length != len) {
+ printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
+ len, slit->header.length);
+ memset(numa_slit, 10, sizeof(numa_slit));
+ return;
+ }
+ slit_table = slit;
+}
+
+void __init
+acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa)
+{
+ /* record this node in proximity bitmap */
+ pxm_bit_set(pa->proximity_domain);
+
+ node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | (pa->lsapic_eid);
+ /* nid should be overridden as logical node id later */
+ node_cpuid[srat_num_cpus].nid = pa->proximity_domain;
+ srat_num_cpus++;
+}
+
+void __init
+acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
+{
+ unsigned long paddr, size;
+ u8 pxm;
+ struct node_memblk_s *p, *q, *pend;
+
+ pxm = ma->proximity_domain;
+
+ /* fill node memory chunk structure */
+ paddr = ma->base_addr_hi;
+ paddr = (paddr << 32) | ma->base_addr_lo;
+ size = ma->length_hi;
+ size = (size << 32) | ma->length_lo;
+
+ /* Ignore disabled entries */
+ if (!ma->flags.enabled)
+ return;
+
+ /* record this node in proximity bitmap */
+ pxm_bit_set(pxm);
+
+ /* Insertion sort based on base address */
+ pend = &node_memblk[num_node_memblks];
+ for (p = &node_memblk[0]; p < pend; p++) {
+ if (paddr < p->start_paddr)
+ break;
+ }
+ if (p < pend) {
+ for (q = pend - 1; q >= p; q--)
+ *(q + 1) = *q;
+ }
+ p->start_paddr = paddr;
+ p->size = size;
+ p->nid = pxm;
+ num_node_memblks++;
+}
+
+void __init
+acpi_numa_arch_fixup (void)
+{
+ int i, j, node_from, node_to;
+
+ /* If there's no SRAT, fix the phys_id and mark node 0 online */
+ if (srat_num_cpus == 0) {
+ node_set_online(0);
+ node_cpuid[0].phys_id = hard_smp_processor_id();
+ return;
+ }
+
+ /*
+ * MCD - This can probably be dropped now. No need for pxm ID to node ID
+ * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES.
+ */
+ /* calculate total number of nodes in system from PXM bitmap */
+ memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map));
+ memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map));
+ nodes_clear(node_online_map);
+ for (i = 0; i < MAX_PXM_DOMAINS; i++) {
+ if (pxm_bit_test(i)) {
+ int nid = num_online_nodes();
+ pxm_to_nid_map[i] = nid;
+ nid_to_pxm_map[nid] = i;
+ node_set_online(nid);
+ }
+ }
+
+ /* set logical node id in memory chunk structure */
+ for (i = 0; i < num_node_memblks; i++)
+ node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid];
+
+ /* assign memory bank numbers for each chunk on each node */
+ for_each_online_node(i) {
+ int bank;
+
+ bank = 0;
+ for (j = 0; j < num_node_memblks; j++)
+ if (node_memblk[j].nid == i)
+ node_memblk[j].bank = bank++;
+ }
+
+ /* set logical node id in cpu structure */
+ for (i = 0; i < srat_num_cpus; i++)
+ node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid];
+
+ printk(KERN_INFO "Number of logical nodes in system = %d\n", num_online_nodes());
+ printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks);
+
+ if (!slit_table) return;
+ memset(numa_slit, -1, sizeof(numa_slit));
+ for (i=0; i<slit_table->localities; i++) {
+ if (!pxm_bit_test(i))
+ continue;
+ node_from = pxm_to_nid_map[i];
+ for (j=0; j<slit_table->localities; j++) {
+ if (!pxm_bit_test(j))
+ continue;
+ node_to = pxm_to_nid_map[j];
+ node_distance(node_from, node_to) =
+ slit_table->entry[i*slit_table->localities + j];
+ }
+ }
+
+#ifdef SLIT_DEBUG
+ printk("ACPI 2.0 SLIT locality table:\n");
+ for_each_online_node(i) {
+ for_each_online_node(j)
+ printk("%03d ", node_distance(i,j));
+ printk("\n");
+ }
+#endif
+}
+#endif /* CONFIG_ACPI_NUMA */
+
+unsigned int
+acpi_register_gsi (u32 gsi, int edge_level, int active_high_low)
+{
+ if (has_8259 && gsi < 16)
+ return isa_irq_to_vector(gsi);
+
+ return iosapic_register_intr(gsi,
+ (active_high_low == ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+ (edge_level == ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+}
+EXPORT_SYMBOL(acpi_register_gsi);
+
+#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
+void
+acpi_unregister_gsi (u32 gsi)
+{
+ iosapic_unregister_intr(gsi);
+}
+EXPORT_SYMBOL(acpi_unregister_gsi);
+#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
+
+static int __init
+acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_header *fadt_header;
+ struct fadt_descriptor_rev2 *fadt;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ fadt_header = (struct acpi_table_header *) __va(phys_addr);
+ if (fadt_header->revision != 3)
+ return -ENODEV; /* Only deal with ACPI 2.0 FADT */
+
+ fadt = (struct fadt_descriptor_rev2 *) fadt_header;
+
+ if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER))
+ acpi_kbd_controller_present = 0;
+
+ if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES)
+ acpi_legacy_devices = 1;
+
+ acpi_register_gsi(fadt->sci_int, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW);
+ return 0;
+}
+
+
+unsigned long __init
+acpi_find_rsdp (void)
+{
+ unsigned long rsdp_phys = 0;
+
+ if (efi.acpi20)
+ rsdp_phys = __pa(efi.acpi20);
+ else if (efi.acpi)
+ printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer supported\n");
+ return rsdp_phys;
+}
+
+
+int __init
+acpi_boot_init (void)
+{
+
+ /*
+ * MADT
+ * ----
+ * Parse the Multiple APIC Description Table (MADT), if exists.
+ * Note that this table provides platform SMP configuration
+ * information -- the successor to MPS tables.
+ */
+
+ if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) {
+ printk(KERN_ERR PREFIX "Can't find MADT\n");
+ goto skip_madt;
+ }
+
+ /* Local APIC */
+
+ if (acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0)
+ printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
+
+ if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS) < 1)
+ printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC entries\n");
+
+ if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0) < 0)
+ printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+
+ /* I/O APIC */
+
+ if (acpi_table_parse_madt(ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1)
+ printk(KERN_ERR PREFIX "Error parsing MADT - no IOSAPIC entries\n");
+
+ /* System-Level Interrupt Routing */
+
+ if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src, ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
+ printk(KERN_ERR PREFIX "Error parsing platform interrupt source entry\n");
+
+ if (acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0)
+ printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
+
+ if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0)
+ printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+ skip_madt:
+
+ /*
+ * FADT says whether a legacy keyboard controller is present.
+ * The FADT also contains an SCI_INT line, by which the system
+ * gets interrupts such as power and sleep buttons. If it's not
+ * on a Legacy interrupt, it needs to be setup.
+ */
+ if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1)
+ printk(KERN_ERR PREFIX "Can't find FADT\n");
+
+#ifdef CONFIG_SMP
+ if (available_cpus == 0) {
+ printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
+ printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
+ smp_boot_data.cpu_phys_id[available_cpus] = hard_smp_processor_id();
+ available_cpus = 1; /* We've got at least one of these, no? */
+ }
+ smp_boot_data.cpu_count = available_cpus;
+
+ smp_build_cpu_map();
+# ifdef CONFIG_ACPI_NUMA
+ if (srat_num_cpus == 0) {
+ int cpu, i = 1;
+ for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
+ if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id())
+ node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu];
+ }
+ build_cpu_to_node_map();
+# endif
+#endif
+ /* Make boot-up look pretty */
+ printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus);
+ return 0;
+}
+
+int
+acpi_gsi_to_irq (u32 gsi, unsigned int *irq)
+{
+ int vector;
+
+ if (has_8259 && gsi < 16)
+ *irq = isa_irq_to_vector(gsi);
+ else {
+ vector = gsi_to_vector(gsi);
+ if (vector == -1)
+ return -1;
+
+ *irq = vector;
+ }
+ return 0;
+}
+
+/*
+ * ACPI based hotplug CPU support
+ */
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+static
+int
+acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
+{
+#ifdef CONFIG_ACPI_NUMA
+ int pxm_id;
+
+ pxm_id = acpi_get_pxm(handle);
+
+ /*
+ * Assuming that the container driver would have set the proximity
+ * domain and would have initialized pxm_to_nid_map[pxm_id] && pxm_flag
+ */
+ node_cpuid[cpu].nid = (pxm_id < 0) ? 0:
+ pxm_to_nid_map[pxm_id];
+
+ node_cpuid[cpu].phys_id = physid;
+#endif
+ return(0);
+}
+
+
+int
+acpi_map_lsapic(acpi_handle handle, int *pcpu)
+{
+ struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+ union acpi_object *obj;
+ struct acpi_table_lsapic *lsapic;
+ cpumask_t tmp_map;
+ long physid;
+ int cpu;
+
+ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
+ return -EINVAL;
+
+ if (!buffer.length || !buffer.pointer)
+ return -EINVAL;
+
+ obj = buffer.pointer;
+ if (obj->type != ACPI_TYPE_BUFFER ||
+ obj->buffer.length < sizeof(*lsapic)) {
+ acpi_os_free(buffer.pointer);
+ return -EINVAL;
+ }
+
+ lsapic = (struct acpi_table_lsapic *)obj->buffer.pointer;
+
+ if ((lsapic->header.type != ACPI_MADT_LSAPIC) ||
+ (!lsapic->flags.enabled)) {
+ acpi_os_free(buffer.pointer);
+ return -EINVAL;
+ }
+
+ physid = ((lsapic->id <<8) | (lsapic->eid));
+
+ acpi_os_free(buffer.pointer);
+ buffer.length = ACPI_ALLOCATE_BUFFER;
+ buffer.pointer = NULL;
+
+ cpus_complement(tmp_map, cpu_present_map);
+ cpu = first_cpu(tmp_map);
+ if(cpu >= NR_CPUS)
+ return -EINVAL;
+
+ acpi_map_cpu2node(handle, cpu, physid);
+
+ cpu_set(cpu, cpu_present_map);
+ ia64_cpu_to_sapicid[cpu] = physid;
+ ia64_acpiid_to_sapicid[lsapic->acpi_id] = ia64_cpu_to_sapicid[cpu];
+
+ *pcpu = cpu;
+ return(0);
+}
+EXPORT_SYMBOL(acpi_map_lsapic);
+
+
+int
+acpi_unmap_lsapic(int cpu)
+{
+ int i;
+
+ for (i=0; i<MAX_SAPICS; i++) {
+ if (ia64_acpiid_to_sapicid[i] == ia64_cpu_to_sapicid[cpu]) {
+ ia64_acpiid_to_sapicid[i] = -1;
+ break;
+ }
+ }
+ ia64_cpu_to_sapicid[cpu] = -1;
+ cpu_clear(cpu,cpu_present_map);
+
+#ifdef CONFIG_ACPI_NUMA
+ /* NUMA specific cleanup's */
+#endif
+
+ return(0);
+}
+EXPORT_SYMBOL(acpi_unmap_lsapic);
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
+
+#ifdef CONFIG_ACPI_NUMA
+acpi_status __init
+acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret)
+{
+ struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+ union acpi_object *obj;
+ struct acpi_table_iosapic *iosapic;
+ unsigned int gsi_base;
+ int node;
+
+ /* Only care about objects w/ a method that returns the MADT */
+ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
+ return AE_OK;
+
+ if (!buffer.length || !buffer.pointer)
+ return AE_OK;
+
+ obj = buffer.pointer;
+ if (obj->type != ACPI_TYPE_BUFFER ||
+ obj->buffer.length < sizeof(*iosapic)) {
+ acpi_os_free(buffer.pointer);
+ return AE_OK;
+ }
+
+ iosapic = (struct acpi_table_iosapic *)obj->buffer.pointer;
+
+ if (iosapic->header.type != ACPI_MADT_IOSAPIC) {
+ acpi_os_free(buffer.pointer);
+ return AE_OK;
+ }
+
+ gsi_base = iosapic->global_irq_base;
+
+ acpi_os_free(buffer.pointer);
+ buffer.length = ACPI_ALLOCATE_BUFFER;
+ buffer.pointer = NULL;
+
+ /*
+ * OK, it's an IOSAPIC MADT entry, look for a _PXM method to tell
+ * us which node to associate this with.
+ */
+ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer)))
+ return AE_OK;
+
+ if (!buffer.length || !buffer.pointer)
+ return AE_OK;
+
+ obj = buffer.pointer;
+
+ if (obj->type != ACPI_TYPE_INTEGER ||
+ obj->integer.value >= MAX_PXM_DOMAINS) {
+ acpi_os_free(buffer.pointer);
+ return AE_OK;
+ }
+
+ node = pxm_to_nid_map[obj->integer.value];
+ acpi_os_free(buffer.pointer);
+
+ if (node >= MAX_NUMNODES || !node_online(node) ||
+ cpus_empty(node_to_cpumask(node)))
+ return AE_OK;
+
+ /* We know a gsi to node mapping! */
+ map_iosapic_to_node(gsi_base, node);
+ return AE_OK;
+}
+#endif /* CONFIG_NUMA */
+#endif /* CONFIG_ACPI_BOOT */
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
new file mode 100644
index 00000000000..7d1ae2982c5
--- /dev/null
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -0,0 +1,239 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#include <linux/config.h>
+
+#include <linux/sched.h>
+
+#include <asm-ia64/processor.h>
+#include <asm-ia64/ptrace.h>
+#include <asm-ia64/siginfo.h>
+#include <asm-ia64/sigcontext.h>
+#include <asm-ia64/mca.h>
+
+#include "../kernel/sigframe.h"
+
+#define DEFINE(sym, val) \
+ asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+void foo(void)
+{
+ DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct));
+ DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info));
+ DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs));
+ DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack));
+ DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo));
+ DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64));
+ DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
+ DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
+
+ BLANK();
+
+ DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+ DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
+
+ BLANK();
+
+ DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
+ DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
+ DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+ DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
+ DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
+ DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
+ DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
+ DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
+ DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
+ DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
+ DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
+
+ BLANK();
+
+ DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock));
+
+ BLANK();
+
+ DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
+ group_stop_count));
+ DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
+
+ BLANK();
+
+ DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6));
+ DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7));
+ DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd));
+ DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd));
+ DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8));
+ DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9));
+ DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10));
+ DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11));
+ DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr));
+ DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip));
+ DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs));
+ DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat));
+ DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs));
+ DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc));
+ DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat));
+
+ DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore));
+ DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr));
+ DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0));
+ DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs));
+ DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1));
+ DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12));
+ DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13));
+ DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr));
+ DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15));
+ DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14));
+ DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2));
+ DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3));
+ DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16));
+ DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17));
+ DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18));
+ DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19));
+ DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20));
+ DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21));
+ DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22));
+ DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23));
+ DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24));
+ DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25));
+ DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26));
+ DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27));
+ DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28));
+ DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29));
+ DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30));
+ DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31));
+ DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv));
+ DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6));
+ DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7));
+ DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8));
+ DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9));
+ DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10));
+ DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11));
+
+ BLANK();
+
+ DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat));
+ DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr));
+ DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2));
+ DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3));
+ DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4));
+ DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5));
+ DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12));
+ DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13));
+ DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14));
+ DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15));
+ DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16));
+ DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17));
+ DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18));
+ DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19));
+ DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20));
+ DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21));
+ DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22));
+ DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23));
+ DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24));
+ DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25));
+ DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26));
+ DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27));
+ DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28));
+ DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29));
+ DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30));
+ DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31));
+ DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4));
+ DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5));
+ DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6));
+ DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7));
+ DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0));
+ DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1));
+ DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2));
+ DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3));
+ DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4));
+ DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5));
+ DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs));
+ DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc));
+ DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat));
+ DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat));
+ DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore));
+ DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr));
+
+ BLANK();
+
+ DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip));
+ DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp));
+ DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr));
+ DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat));
+ DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat));
+ DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0]));
+ DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm));
+ DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags));
+ DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6]));
+ DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr));
+ DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12]));
+ DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base));
+ DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs));
+
+ BLANK();
+
+ DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal));
+
+ BLANK();
+
+ DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0));
+ DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1));
+ DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2));
+ DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler));
+ DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc));
+ BLANK();
+ /* for assembly files which can't include sched.h: */
+ DEFINE(IA64_CLONE_VFORK, CLONE_VFORK);
+ DEFINE(IA64_CLONE_VM, CLONE_VM);
+
+ BLANK();
+ DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET,
+ offsetof (struct cpuinfo_ia64, nsec_per_cyc));
+ DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET,
+ offsetof (struct cpuinfo_ia64, ptce_base));
+ DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET,
+ offsetof (struct cpuinfo_ia64, ptce_count));
+ DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET,
+ offsetof (struct cpuinfo_ia64, ptce_stride));
+ BLANK();
+ DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET,
+ offsetof (struct timespec, tv_nsec));
+
+ DEFINE(CLONE_SETTLS_BIT, 19);
+#if CLONE_SETTLS != (1<<19)
+# error "CLONE_SETTLS_BIT incorrect, please fix"
+#endif
+
+ BLANK();
+ DEFINE(IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET,
+ offsetof (struct ia64_mca_cpu, proc_state_dump));
+ DEFINE(IA64_MCA_CPU_STACK_OFFSET,
+ offsetof (struct ia64_mca_cpu, stack));
+ DEFINE(IA64_MCA_CPU_STACKFRAME_OFFSET,
+ offsetof (struct ia64_mca_cpu, stackframe));
+ DEFINE(IA64_MCA_CPU_RBSTORE_OFFSET,
+ offsetof (struct ia64_mca_cpu, rbstore));
+ DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET,
+ offsetof (struct ia64_mca_cpu, init_stack));
+ BLANK();
+ /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
+ DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr));
+ DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source));
+ DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift));
+ DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc));
+ DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset));
+ DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle));
+ DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter));
+ DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter));
+ DEFINE(IA64_TIME_INTERPOLATOR_MASK_OFFSET, offsetof (struct time_interpolator, mask));
+ DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU);
+ DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64);
+ DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32);
+ DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec));
+}
diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c
new file mode 100644
index 00000000000..0b286ca164f
--- /dev/null
+++ b/arch/ia64/kernel/brl_emu.c
@@ -0,0 +1,234 @@
+/*
+ * Emulation of the "brl" instruction for IA64 processors that
+ * don't support it in hardware.
+ * Author: Stephan Zeisset, Intel Corp. <Stephan.Zeisset@intel.com>
+ *
+ * 02/22/02 D. Mosberger Clear si_flgs, si_isr, and si_imm to avoid
+ * leaking kernel bits.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+extern char ia64_set_b1, ia64_set_b2, ia64_set_b3, ia64_set_b4, ia64_set_b5;
+
+struct illegal_op_return {
+ unsigned long fkt, arg1, arg2, arg3;
+};
+
+/*
+ * The unimplemented bits of a virtual address must be set
+ * to the value of the most significant implemented bit.
+ * unimpl_va_mask includes all unimplemented bits and
+ * the most significant implemented bit, so the result
+ * of an and operation with the mask must be all 0's
+ * or all 1's for the address to be valid.
+ */
+#define unimplemented_virtual_address(va) ( \
+ ((va) & local_cpu_data->unimpl_va_mask) != 0 && \
+ ((va) & local_cpu_data->unimpl_va_mask) != local_cpu_data->unimpl_va_mask \
+)
+
+/*
+ * The unimplemented bits of a physical address must be 0.
+ * unimpl_pa_mask includes all unimplemented bits, so the result
+ * of an and operation with the mask must be all 0's for the
+ * address to be valid.
+ */
+#define unimplemented_physical_address(pa) ( \
+ ((pa) & local_cpu_data->unimpl_pa_mask) != 0 \
+)
+
+/*
+ * Handle an illegal operation fault that was caused by an
+ * unimplemented "brl" instruction.
+ * If we are not successful (e.g because the illegal operation
+ * wasn't caused by a "brl" after all), we return -1.
+ * If we are successful, we return either 0 or the address
+ * of a "fixup" function for manipulating preserved register
+ * state.
+ */
+
+struct illegal_op_return
+ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec)
+{
+ unsigned long bundle[2];
+ unsigned long opcode, btype, qp, offset, cpl;
+ unsigned long next_ip;
+ struct siginfo siginfo;
+ struct illegal_op_return rv;
+ long tmp_taken, unimplemented_address;
+
+ rv.fkt = (unsigned long) -1;
+
+ /*
+ * Decode the instruction bundle.
+ */
+
+ if (copy_from_user(bundle, (void *) (regs->cr_iip), sizeof(bundle)))
+ return rv;
+
+ next_ip = (unsigned long) regs->cr_iip + 16;
+
+ /* "brl" must be in slot 2. */
+ if (ia64_psr(regs)->ri != 1) return rv;
+
+ /* Must be "mlx" template */
+ if ((bundle[0] & 0x1e) != 0x4) return rv;
+
+ opcode = (bundle[1] >> 60);
+ btype = ((bundle[1] >> 29) & 0x7);
+ qp = ((bundle[1] >> 23) & 0x3f);
+ offset = ((bundle[1] & 0x0800000000000000L) << 4)
+ | ((bundle[1] & 0x00fffff000000000L) >> 32)
+ | ((bundle[1] & 0x00000000007fffffL) << 40)
+ | ((bundle[0] & 0xffff000000000000L) >> 24);
+
+ tmp_taken = regs->pr & (1L << qp);
+
+ switch(opcode) {
+
+ case 0xC:
+ /*
+ * Long Branch.
+ */
+ if (btype != 0) return rv;
+ rv.fkt = 0;
+ if (!(tmp_taken)) {
+ /*
+ * Qualifying predicate is 0.
+ * Skip instruction.
+ */
+ regs->cr_iip = next_ip;
+ ia64_psr(regs)->ri = 0;
+ return rv;
+ }
+ break;
+
+ case 0xD:
+ /*
+ * Long Call.
+ */
+ rv.fkt = 0;
+ if (!(tmp_taken)) {
+ /*
+ * Qualifying predicate is 0.
+ * Skip instruction.
+ */
+ regs->cr_iip = next_ip;
+ ia64_psr(regs)->ri = 0;
+ return rv;
+ }
+
+ /*
+ * BR[btype] = IP+16
+ */
+ switch(btype) {
+ case 0:
+ regs->b0 = next_ip;
+ break;
+ case 1:
+ rv.fkt = (unsigned long) &ia64_set_b1;
+ break;
+ case 2:
+ rv.fkt = (unsigned long) &ia64_set_b2;
+ break;
+ case 3:
+ rv.fkt = (unsigned long) &ia64_set_b3;
+ break;
+ case 4:
+ rv.fkt = (unsigned long) &ia64_set_b4;
+ break;
+ case 5:
+ rv.fkt = (unsigned long) &ia64_set_b5;
+ break;
+ case 6:
+ regs->b6 = next_ip;
+ break;
+ case 7:
+ regs->b7 = next_ip;
+ break;
+ }
+ rv.arg1 = next_ip;
+
+ /*
+ * AR[PFS].pfm = CFM
+ * AR[PFS].pec = AR[EC]
+ * AR[PFS].ppl = PSR.cpl
+ */
+ cpl = ia64_psr(regs)->cpl;
+ regs->ar_pfs = ((regs->cr_ifs & 0x3fffffffff)
+ | (ar_ec << 52) | (cpl << 62));
+
+ /*
+ * CFM.sof -= CFM.sol
+ * CFM.sol = 0
+ * CFM.sor = 0
+ * CFM.rrb.gr = 0
+ * CFM.rrb.fr = 0
+ * CFM.rrb.pr = 0
+ */
+ regs->cr_ifs = ((regs->cr_ifs & 0xffffffc00000007f)
+ - ((regs->cr_ifs >> 7) & 0x7f));
+
+ break;
+
+ default:
+ /*
+ * Unknown opcode.
+ */
+ return rv;
+
+ }
+
+ regs->cr_iip += offset;
+ ia64_psr(regs)->ri = 0;
+
+ if (ia64_psr(regs)->it == 0)
+ unimplemented_address = unimplemented_physical_address(regs->cr_iip);
+ else
+ unimplemented_address = unimplemented_virtual_address(regs->cr_iip);
+
+ if (unimplemented_address) {
+ /*
+ * The target address contains unimplemented bits.
+ */
+ printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n");
+ siginfo.si_signo = SIGILL;
+ siginfo.si_errno = 0;
+ siginfo.si_flags = 0;
+ siginfo.si_isr = 0;
+ siginfo.si_imm = 0;
+ siginfo.si_code = ILL_BADIADDR;
+ force_sig_info(SIGILL, &siginfo, current);
+ } else if (ia64_psr(regs)->tb) {
+ /*
+ * Branch Tracing is enabled.
+ * Force a taken branch signal.
+ */
+ siginfo.si_signo = SIGTRAP;
+ siginfo.si_errno = 0;
+ siginfo.si_code = TRAP_BRANCH;
+ siginfo.si_flags = 0;
+ siginfo.si_isr = 0;
+ siginfo.si_addr = 0;
+ siginfo.si_imm = 0;
+ force_sig_info(SIGTRAP, &siginfo, current);
+ } else if (ia64_psr(regs)->ss) {
+ /*
+ * Single Step is enabled.
+ * Force a trace signal.
+ */
+ siginfo.si_signo = SIGTRAP;
+ siginfo.si_errno = 0;
+ siginfo.si_code = TRAP_TRACE;
+ siginfo.si_flags = 0;
+ siginfo.si_isr = 0;
+ siginfo.si_addr = 0;
+ siginfo.si_imm = 0;
+ force_sig_info(SIGTRAP, &siginfo, current);
+ }
+ return rv;
+}
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
new file mode 100644
index 00000000000..768c7e46957
--- /dev/null
+++ b/arch/ia64/kernel/cyclone.c
@@ -0,0 +1,109 @@
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <asm/io.h>
+
+/* IBM Summit (EXA) Cyclone counter code*/
+#define CYCLONE_CBAR_ADDR 0xFEB00CD0
+#define CYCLONE_PMCC_OFFSET 0x51A0
+#define CYCLONE_MPMC_OFFSET 0x51D0
+#define CYCLONE_MPCS_OFFSET 0x51A8
+#define CYCLONE_TIMER_FREQ 100000000
+
+int use_cyclone;
+void __init cyclone_setup(void)
+{
+ use_cyclone = 1;
+}
+
+
+struct time_interpolator cyclone_interpolator = {
+ .source = TIME_SOURCE_MMIO64,
+ .shift = 16,
+ .frequency = CYCLONE_TIMER_FREQ,
+ .drift = -100,
+ .mask = (1LL << 40) - 1
+};
+
+int __init init_cyclone_clock(void)
+{
+ u64* reg;
+ u64 base; /* saved cyclone base address */
+ u64 offset; /* offset from pageaddr to cyclone_timer register */
+ int i;
+ u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
+
+ if (!use_cyclone)
+ return -ENODEV;
+
+ printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
+
+ /* find base address */
+ offset = (CYCLONE_CBAR_ADDR);
+ reg = (u64*)ioremap_nocache(offset, sizeof(u64));
+ if(!reg){
+ printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
+ use_cyclone = 0;
+ return -ENODEV;
+ }
+ base = readq(reg);
+ if(!base){
+ printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
+ use_cyclone = 0;
+ return -ENODEV;
+ }
+ iounmap(reg);
+
+ /* setup PMCC */
+ offset = (base + CYCLONE_PMCC_OFFSET);
+ reg = (u64*)ioremap_nocache(offset, sizeof(u64));
+ if(!reg){
+ printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
+ use_cyclone = 0;
+ return -ENODEV;
+ }
+ writel(0x00000001,reg);
+ iounmap(reg);
+
+ /* setup MPCS */
+ offset = (base + CYCLONE_MPCS_OFFSET);
+ reg = (u64*)ioremap_nocache(offset, sizeof(u64));
+ if(!reg){
+ printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
+ use_cyclone = 0;
+ return -ENODEV;
+ }
+ writel(0x00000001,reg);
+ iounmap(reg);
+
+ /* map in cyclone_timer */
+ offset = (base + CYCLONE_MPMC_OFFSET);
+ cyclone_timer = (u32*)ioremap_nocache(offset, sizeof(u32));
+ if(!cyclone_timer){
+ printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
+ use_cyclone = 0;
+ return -ENODEV;
+ }
+
+ /*quick test to make sure its ticking*/
+ for(i=0; i<3; i++){
+ u32 old = readl(cyclone_timer);
+ int stall = 100;
+ while(stall--) barrier();
+ if(readl(cyclone_timer) == old){
+ printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
+ iounmap(cyclone_timer);
+ cyclone_timer = 0;
+ use_cyclone = 0;
+ return -ENODEV;
+ }
+ }
+ /* initialize last tick */
+ cyclone_interpolator.addr = cyclone_timer;
+ register_time_interpolator(&cyclone_interpolator);
+
+ return 0;
+}
+
+__initcall(init_cyclone_clock);
diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c
new file mode 100644
index 00000000000..fe532c97043
--- /dev/null
+++ b/arch/ia64/kernel/domain.c
@@ -0,0 +1,382 @@
+/*
+ * arch/ia64/kernel/domain.c
+ * Architecture specific sched-domains builder.
+ *
+ * Copyright (C) 2004 Jesse Barnes
+ * Copyright (C) 2004 Silicon Graphics, Inc.
+ */
+
+#include <linux/sched.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/topology.h>
+#include <linux/nodemask.h>
+
+#define SD_NODES_PER_DOMAIN 6
+
+#ifdef CONFIG_NUMA
+/**
+ * find_next_best_node - find the next node to include in a sched_domain
+ * @node: node whose sched_domain we're building
+ * @used_nodes: nodes already in the sched_domain
+ *
+ * Find the next node to include in a given scheduling domain. Simply
+ * finds the closest node not already in the @used_nodes map.
+ *
+ * Should use nodemask_t.
+ */
+static int __devinit find_next_best_node(int node, unsigned long *used_nodes)
+{
+ int i, n, val, min_val, best_node = 0;
+
+ min_val = INT_MAX;
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ /* Start at @node */
+ n = (node + i) % MAX_NUMNODES;
+
+ if (!nr_cpus_node(n))
+ continue;
+
+ /* Skip already used nodes */
+ if (test_bit(n, used_nodes))
+ continue;
+
+ /* Simple min distance search */
+ val = node_distance(node, n);
+
+ if (val < min_val) {
+ min_val = val;
+ best_node = n;
+ }
+ }
+
+ set_bit(best_node, used_nodes);
+ return best_node;
+}
+
+/**
+ * sched_domain_node_span - get a cpumask for a node's sched_domain
+ * @node: node whose cpumask we're constructing
+ * @size: number of nodes to include in this span
+ *
+ * Given a node, construct a good cpumask for its sched_domain to span. It
+ * should be one that prevents unnecessary balancing, but also spreads tasks
+ * out optimally.
+ */
+static cpumask_t __devinit sched_domain_node_span(int node)
+{
+ int i;
+ cpumask_t span, nodemask;
+ DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
+
+ cpus_clear(span);
+ bitmap_zero(used_nodes, MAX_NUMNODES);
+
+ nodemask = node_to_cpumask(node);
+ cpus_or(span, span, nodemask);
+ set_bit(node, used_nodes);
+
+ for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
+ int next_node = find_next_best_node(node, used_nodes);
+ nodemask = node_to_cpumask(next_node);
+ cpus_or(span, span, nodemask);
+ }
+
+ return span;
+}
+#endif
+
+/*
+ * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
+ * can switch it on easily if needed.
+ */
+#ifdef CONFIG_SCHED_SMT
+static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
+static struct sched_group sched_group_cpus[NR_CPUS];
+static int __devinit cpu_to_cpu_group(int cpu)
+{
+ return cpu;
+}
+#endif
+
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+static struct sched_group sched_group_phys[NR_CPUS];
+static int __devinit cpu_to_phys_group(int cpu)
+{
+#ifdef CONFIG_SCHED_SMT
+ return first_cpu(cpu_sibling_map[cpu]);
+#else
+ return cpu;
+#endif
+}
+
+#ifdef CONFIG_NUMA
+/*
+ * The init_sched_build_groups can't handle what we want to do with node
+ * groups, so roll our own. Now each node has its own list of groups which
+ * gets dynamically allocated.
+ */
+static DEFINE_PER_CPU(struct sched_domain, node_domains);
+static struct sched_group *sched_group_nodes[MAX_NUMNODES];
+
+static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
+static struct sched_group sched_group_allnodes[MAX_NUMNODES];
+
+static int __devinit cpu_to_allnodes_group(int cpu)
+{
+ return cpu_to_node(cpu);
+}
+#endif
+
+/*
+ * Set up scheduler domains and groups. Callers must hold the hotplug lock.
+ */
+void __devinit arch_init_sched_domains(void)
+{
+ int i;
+ cpumask_t cpu_default_map;
+
+ /*
+ * Setup mask for cpus without special case scheduling requirements.
+ * For now this just excludes isolated cpus, but could be used to
+ * exclude other special cases in the future.
+ */
+ cpus_complement(cpu_default_map, cpu_isolated_map);
+ cpus_and(cpu_default_map, cpu_default_map, cpu_online_map);
+
+ /*
+ * Set up domains. Isolated domains just stay on the dummy domain.
+ */
+ for_each_cpu_mask(i, cpu_default_map) {
+ int group;
+ struct sched_domain *sd = NULL, *p;
+ cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
+
+ cpus_and(nodemask, nodemask, cpu_default_map);
+
+#ifdef CONFIG_NUMA
+ if (num_online_cpus()
+ > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
+ sd = &per_cpu(allnodes_domains, i);
+ *sd = SD_ALLNODES_INIT;
+ sd->span = cpu_default_map;
+ group = cpu_to_allnodes_group(i);
+ sd->groups = &sched_group_allnodes[group];
+ p = sd;
+ } else
+ p = NULL;
+
+ sd = &per_cpu(node_domains, i);
+ *sd = SD_NODE_INIT;
+ sd->span = sched_domain_node_span(cpu_to_node(i));
+ sd->parent = p;
+ cpus_and(sd->span, sd->span, cpu_default_map);
+#endif
+
+ p = sd;
+ sd = &per_cpu(phys_domains, i);
+ group = cpu_to_phys_group(i);
+ *sd = SD_CPU_INIT;
+ sd->span = nodemask;
+ sd->parent = p;
+ sd->groups = &sched_group_phys[group];
+
+#ifdef CONFIG_SCHED_SMT
+ p = sd;
+ sd = &per_cpu(cpu_domains, i);
+ group = cpu_to_cpu_group(i);
+ *sd = SD_SIBLING_INIT;
+ sd->span = cpu_sibling_map[i];
+ cpus_and(sd->span, sd->span, cpu_default_map);
+ sd->parent = p;
+ sd->groups = &sched_group_cpus[group];
+#endif
+ }
+
+#ifdef CONFIG_SCHED_SMT
+ /* Set up CPU (sibling) groups */
+ for_each_cpu_mask(i, cpu_default_map) {
+ cpumask_t this_sibling_map = cpu_sibling_map[i];
+ cpus_and(this_sibling_map, this_sibling_map, cpu_default_map);
+ if (i != first_cpu(this_sibling_map))
+ continue;
+
+ init_sched_build_groups(sched_group_cpus, this_sibling_map,
+ &cpu_to_cpu_group);
+ }
+#endif
+
+ /* Set up physical groups */
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ cpumask_t nodemask = node_to_cpumask(i);
+
+ cpus_and(nodemask, nodemask, cpu_default_map);
+ if (cpus_empty(nodemask))
+ continue;
+
+ init_sched_build_groups(sched_group_phys, nodemask,
+ &cpu_to_phys_group);
+ }
+
+#ifdef CONFIG_NUMA
+ init_sched_build_groups(sched_group_allnodes, cpu_default_map,
+ &cpu_to_allnodes_group);
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ /* Set up node groups */
+ struct sched_group *sg, *prev;
+ cpumask_t nodemask = node_to_cpumask(i);
+ cpumask_t domainspan;
+ cpumask_t covered = CPU_MASK_NONE;
+ int j;
+
+ cpus_and(nodemask, nodemask, cpu_default_map);
+ if (cpus_empty(nodemask))
+ continue;
+
+ domainspan = sched_domain_node_span(i);
+ cpus_and(domainspan, domainspan, cpu_default_map);
+
+ sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
+ sched_group_nodes[i] = sg;
+ for_each_cpu_mask(j, nodemask) {
+ struct sched_domain *sd;
+ sd = &per_cpu(node_domains, j);
+ sd->groups = sg;
+ if (sd->groups == NULL) {
+ /* Turn off balancing if we have no groups */
+ sd->flags = 0;
+ }
+ }
+ if (!sg) {
+ printk(KERN_WARNING
+ "Can not alloc domain group for node %d\n", i);
+ continue;
+ }
+ sg->cpu_power = 0;
+ sg->cpumask = nodemask;
+ cpus_or(covered, covered, nodemask);
+ prev = sg;
+
+ for (j = 0; j < MAX_NUMNODES; j++) {
+ cpumask_t tmp, notcovered;
+ int n = (i + j) % MAX_NUMNODES;
+
+ cpus_complement(notcovered, covered);
+ cpus_and(tmp, notcovered, cpu_default_map);
+ cpus_and(tmp, tmp, domainspan);
+ if (cpus_empty(tmp))
+ break;
+
+ nodemask = node_to_cpumask(n);
+ cpus_and(tmp, tmp, nodemask);
+ if (cpus_empty(tmp))
+ continue;
+
+ sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
+ if (!sg) {
+ printk(KERN_WARNING
+ "Can not alloc domain group for node %d\n", j);
+ break;
+ }
+ sg->cpu_power = 0;
+ sg->cpumask = tmp;
+ cpus_or(covered, covered, tmp);
+ prev->next = sg;
+ prev = sg;
+ }
+ prev->next = sched_group_nodes[i];
+ }
+#endif
+
+ /* Calculate CPU power for physical packages and nodes */
+ for_each_cpu_mask(i, cpu_default_map) {
+ int power;
+ struct sched_domain *sd;
+#ifdef CONFIG_SCHED_SMT
+ sd = &per_cpu(cpu_domains, i);
+ power = SCHED_LOAD_SCALE;
+ sd->groups->cpu_power = power;
+#endif
+
+ sd = &per_cpu(phys_domains, i);
+ power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
+ (cpus_weight(sd->groups->cpumask)-1) / 10;
+ sd->groups->cpu_power = power;
+
+#ifdef CONFIG_NUMA
+ sd = &per_cpu(allnodes_domains, i);
+ if (sd->groups) {
+ power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
+ (cpus_weight(sd->groups->cpumask)-1) / 10;
+ sd->groups->cpu_power = power;
+ }
+#endif
+ }
+
+#ifdef CONFIG_NUMA
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ struct sched_group *sg = sched_group_nodes[i];
+ int j;
+
+ if (sg == NULL)
+ continue;
+next_sg:
+ for_each_cpu_mask(j, sg->cpumask) {
+ struct sched_domain *sd;
+ int power;
+
+ sd = &per_cpu(phys_domains, j);
+ if (j != first_cpu(sd->groups->cpumask)) {
+ /*
+ * Only add "power" once for each
+ * physical package.
+ */
+ continue;
+ }
+ power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
+ (cpus_weight(sd->groups->cpumask)-1) / 10;
+
+ sg->cpu_power += power;
+ }
+ sg = sg->next;
+ if (sg != sched_group_nodes[i])
+ goto next_sg;
+ }
+#endif
+
+ /* Attach the domains */
+ for_each_online_cpu(i) {
+ struct sched_domain *sd;
+#ifdef CONFIG_SCHED_SMT
+ sd = &per_cpu(cpu_domains, i);
+#else
+ sd = &per_cpu(phys_domains, i);
+#endif
+ cpu_attach_domain(sd, i);
+ }
+}
+
+void __devinit arch_destroy_sched_domains(void)
+{
+#ifdef CONFIG_NUMA
+ int i;
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ struct sched_group *oldsg, *sg = sched_group_nodes[i];
+ if (sg == NULL)
+ continue;
+ sg = sg->next;
+next_sg:
+ oldsg = sg;
+ sg = sg->next;
+ kfree(oldsg);
+ if (oldsg != sched_group_nodes[i])
+ goto next_sg;
+ sched_group_nodes[i] = NULL;
+ }
+#endif
+}
+
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
new file mode 100644
index 00000000000..4a3b1aac43e
--- /dev/null
+++ b/arch/ia64/kernel/efi.c
@@ -0,0 +1,832 @@
+/*
+ * Extensible Firmware Interface
+ *
+ * Based on Extensible Firmware Interface Specification version 0.9 April 30, 1999
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999-2003 Hewlett-Packard Co.
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * All EFI Runtime Services are not implemented yet as EFI only
+ * supports physical mode addressing on SoftSDV. This is to be fixed
+ * in a future version. --drummond 1999-07-20
+ *
+ * Implemented EFI runtime services and virtual mode calls. --davidm
+ *
+ * Goutham Rao: <goutham.rao@intel.com>
+ * Skip non-WB memory and ignore empty memory ranges.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/efi.h>
+
+#include <asm/io.h>
+#include <asm/kregs.h>
+#include <asm/meminit.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/mca.h>
+
+#define EFI_DEBUG 0
+
+extern efi_status_t efi_call_phys (void *, ...);
+
+struct efi efi;
+EXPORT_SYMBOL(efi);
+static efi_runtime_services_t *runtime;
+static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
+
+#define efi_call_virt(f, args...) (*(f))(args)
+
+#define STUB_GET_TIME(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_time_cap_t *atc = NULL; \
+ efi_status_t ret; \
+ \
+ if (tc) \
+ atc = adjust_arg(tc); \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), adjust_arg(tm), atc); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
+}
+
+#define STUB_SET_TIME(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_set_time (efi_time_t *tm) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_status_t ret; \
+ \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time), adjust_arg(tm)); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
+}
+
+#define STUB_GET_WAKEUP_TIME(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_status_t ret; \
+ \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix((efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time), \
+ adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm)); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
+}
+
+#define STUB_SET_WAKEUP_TIME(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_time_t *atm = NULL; \
+ efi_status_t ret; \
+ \
+ if (tm) \
+ atm = adjust_arg(tm); \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix((efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time), \
+ enabled, atm); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
+}
+
+#define STUB_GET_VARIABLE(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr, \
+ unsigned long *data_size, void *data) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ u32 *aattr = NULL; \
+ efi_status_t ret; \
+ \
+ if (attr) \
+ aattr = adjust_arg(attr); \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix((efi_get_variable_t *) __va(runtime->get_variable), \
+ adjust_arg(name), adjust_arg(vendor), aattr, \
+ adjust_arg(data_size), adjust_arg(data)); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
+}
+
+#define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_status_t ret; \
+ \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix((efi_get_next_variable_t *) __va(runtime->get_next_variable), \
+ adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor)); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
+}
+
+#define STUB_SET_VARIABLE(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, unsigned long attr, \
+ unsigned long data_size, void *data) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_status_t ret; \
+ \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix((efi_set_variable_t *) __va(runtime->set_variable), \
+ adjust_arg(name), adjust_arg(vendor), attr, data_size, \
+ adjust_arg(data)); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
+}
+
+#define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_get_next_high_mono_count (u32 *count) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_status_t ret; \
+ \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix((efi_get_next_high_mono_count_t *) \
+ __va(runtime->get_next_high_mono_count), adjust_arg(count)); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
+}
+
+#define STUB_RESET_SYSTEM(prefix, adjust_arg) \
+static void \
+prefix##_reset_system (int reset_type, efi_status_t status, \
+ unsigned long data_size, efi_char16_t *data) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_char16_t *adata = NULL; \
+ \
+ if (data) \
+ adata = adjust_arg(data); \
+ \
+ ia64_save_scratch_fpregs(fr); \
+ efi_call_##prefix((efi_reset_system_t *) __va(runtime->reset_system), \
+ reset_type, status, data_size, adata); \
+ /* should not return, but just in case... */ \
+ ia64_load_scratch_fpregs(fr); \
+}
+
+#define phys_ptr(arg) ((__typeof__(arg)) ia64_tpa(arg))
+
+STUB_GET_TIME(phys, phys_ptr)
+STUB_SET_TIME(phys, phys_ptr)
+STUB_GET_WAKEUP_TIME(phys, phys_ptr)
+STUB_SET_WAKEUP_TIME(phys, phys_ptr)
+STUB_GET_VARIABLE(phys, phys_ptr)
+STUB_GET_NEXT_VARIABLE(phys, phys_ptr)
+STUB_SET_VARIABLE(phys, phys_ptr)
+STUB_GET_NEXT_HIGH_MONO_COUNT(phys, phys_ptr)
+STUB_RESET_SYSTEM(phys, phys_ptr)
+
+#define id(arg) arg
+
+STUB_GET_TIME(virt, id)
+STUB_SET_TIME(virt, id)
+STUB_GET_WAKEUP_TIME(virt, id)
+STUB_SET_WAKEUP_TIME(virt, id)
+STUB_GET_VARIABLE(virt, id)
+STUB_GET_NEXT_VARIABLE(virt, id)
+STUB_SET_VARIABLE(virt, id)
+STUB_GET_NEXT_HIGH_MONO_COUNT(virt, id)
+STUB_RESET_SYSTEM(virt, id)
+
+void
+efi_gettimeofday (struct timespec *ts)
+{
+ efi_time_t tm;
+
+ memset(ts, 0, sizeof(ts));
+ if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS)
+ return;
+
+ ts->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second);
+ ts->tv_nsec = tm.nanosecond;
+}
+
+static int
+is_available_memory (efi_memory_desc_t *md)
+{
+ if (!(md->attribute & EFI_MEMORY_WB))
+ return 0;
+
+ switch (md->type) {
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Trim descriptor MD so its starts at address START_ADDR. If the descriptor covers
+ * memory that is normally available to the kernel, issue a warning that some memory
+ * is being ignored.
+ */
+static void
+trim_bottom (efi_memory_desc_t *md, u64 start_addr)
+{
+ u64 num_skipped_pages;
+
+ if (md->phys_addr >= start_addr || !md->num_pages)
+ return;
+
+ num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
+ if (num_skipped_pages > md->num_pages)
+ num_skipped_pages = md->num_pages;
+
+ if (is_available_memory(md))
+ printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
+ "at 0x%lx\n", __FUNCTION__,
+ (num_skipped_pages << EFI_PAGE_SHIFT) >> 10,
+ md->phys_addr, start_addr - IA64_GRANULE_SIZE);
+ /*
+ * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory
+ * descriptor list to become unsorted. In such a case, md->num_pages will be
+ * zero, so the Right Thing will happen.
+ */
+ md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT;
+ md->num_pages -= num_skipped_pages;
+}
+
+static void
+trim_top (efi_memory_desc_t *md, u64 end_addr)
+{
+ u64 num_dropped_pages, md_end_addr;
+
+ md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+
+ if (md_end_addr <= end_addr || !md->num_pages)
+ return;
+
+ num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT;
+ if (num_dropped_pages > md->num_pages)
+ num_dropped_pages = md->num_pages;
+
+ if (is_available_memory(md))
+ printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
+ "at 0x%lx\n", __FUNCTION__,
+ (num_dropped_pages << EFI_PAGE_SHIFT) >> 10,
+ md->phys_addr, end_addr);
+ md->num_pages -= num_dropped_pages;
+}
+
+/*
+ * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that
+ * has memory that is available for OS use.
+ */
+void
+efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
+{
+ int prev_valid = 0;
+ struct range {
+ u64 start;
+ u64 end;
+ } prev, curr;
+ void *efi_map_start, *efi_map_end, *p, *q;
+ efi_memory_desc_t *md, *check_md;
+ u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0;
+ unsigned long total_mem = 0;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+
+ /* skip over non-WB memory descriptors; that's all we're interested in... */
+ if (!(md->attribute & EFI_MEMORY_WB))
+ continue;
+
+ /*
+ * granule_addr is the base of md's first granule.
+ * [granule_addr - first_non_wb_addr) is guaranteed to
+ * be contiguous WB memory.
+ */
+ granule_addr = GRANULEROUNDDOWN(md->phys_addr);
+ first_non_wb_addr = max(first_non_wb_addr, granule_addr);
+
+ if (first_non_wb_addr < md->phys_addr) {
+ trim_bottom(md, granule_addr + IA64_GRANULE_SIZE);
+ granule_addr = GRANULEROUNDDOWN(md->phys_addr);
+ first_non_wb_addr = max(first_non_wb_addr, granule_addr);
+ }
+
+ for (q = p; q < efi_map_end; q += efi_desc_size) {
+ check_md = q;
+
+ if ((check_md->attribute & EFI_MEMORY_WB) &&
+ (check_md->phys_addr == first_non_wb_addr))
+ first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT;
+ else
+ break; /* non-WB or hole */
+ }
+
+ last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr);
+ if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT))
+ trim_top(md, last_granule_addr);
+
+ if (is_available_memory(md)) {
+ if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) {
+ if (md->phys_addr >= max_addr)
+ continue;
+ md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
+ first_non_wb_addr = max_addr;
+ }
+
+ if (total_mem >= mem_limit)
+ continue;
+
+ if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > mem_limit) {
+ unsigned long limit_addr = md->phys_addr;
+
+ limit_addr += mem_limit - total_mem;
+ limit_addr = GRANULEROUNDDOWN(limit_addr);
+
+ if (md->phys_addr > limit_addr)
+ continue;
+
+ md->num_pages = (limit_addr - md->phys_addr) >>
+ EFI_PAGE_SHIFT;
+ first_non_wb_addr = max_addr = md->phys_addr +
+ (md->num_pages << EFI_PAGE_SHIFT);
+ }
+ total_mem += (md->num_pages << EFI_PAGE_SHIFT);
+
+ if (md->num_pages == 0)
+ continue;
+
+ curr.start = PAGE_OFFSET + md->phys_addr;
+ curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
+
+ if (!prev_valid) {
+ prev = curr;
+ prev_valid = 1;
+ } else {
+ if (curr.start < prev.start)
+ printk(KERN_ERR "Oops: EFI memory table not ordered!\n");
+
+ if (prev.end == curr.start) {
+ /* merge two consecutive memory ranges */
+ prev.end = curr.end;
+ } else {
+ start = PAGE_ALIGN(prev.start);
+ end = prev.end & PAGE_MASK;
+ if ((end > start) && (*callback)(start, end, arg) < 0)
+ return;
+ prev = curr;
+ }
+ }
+ }
+ }
+ if (prev_valid) {
+ start = PAGE_ALIGN(prev.start);
+ end = prev.end & PAGE_MASK;
+ if (end > start)
+ (*callback)(start, end, arg);
+ }
+}
+
+/*
+ * Look for the PAL_CODE region reported by EFI and maps it using an
+ * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor
+ * Abstraction Layer chapter 11 in ADAG
+ */
+
+void *
+efi_get_pal_addr (void)
+{
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ u64 efi_desc_size;
+ int pal_code_count = 0;
+ u64 vaddr, mask;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+ if (md->type != EFI_PAL_CODE)
+ continue;
+
+ if (++pal_code_count > 1) {
+ printk(KERN_ERR "Too many EFI Pal Code memory ranges, dropped @ %lx\n",
+ md->phys_addr);
+ continue;
+ }
+ /*
+ * The only ITLB entry in region 7 that is used is the one installed by
+ * __start(). That entry covers a 64MB range.
+ */
+ mask = ~((1 << KERNEL_TR_PAGE_SHIFT) - 1);
+ vaddr = PAGE_OFFSET + md->phys_addr;
+
+ /*
+ * We must check that the PAL mapping won't overlap with the kernel
+ * mapping.
+ *
+ * PAL code is guaranteed to be aligned on a power of 2 between 4k and
+ * 256KB and that only one ITR is needed to map it. This implies that the
+ * PAL code is always aligned on its size, i.e., the closest matching page
+ * size supported by the TLB. Therefore PAL code is guaranteed never to
+ * cross a 64MB unless it is bigger than 64MB (very unlikely!). So for
+ * now the following test is enough to determine whether or not we need a
+ * dedicated ITR for the PAL code.
+ */
+ if ((vaddr & mask) == (KERNEL_START & mask)) {
+ printk(KERN_INFO "%s: no need to install ITR for PAL code\n",
+ __FUNCTION__);
+ continue;
+ }
+
+ if (md->num_pages << EFI_PAGE_SHIFT > IA64_GRANULE_SIZE)
+ panic("Woah! PAL code size bigger than a granule!");
+
+#if EFI_DEBUG
+ mask = ~((1 << IA64_GRANULE_SHIFT) - 1);
+
+ printk(KERN_INFO "CPU %d: mapping PAL code [0x%lx-0x%lx) into [0x%lx-0x%lx)\n",
+ smp_processor_id(), md->phys_addr,
+ md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+ vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE);
+#endif
+ return __va(md->phys_addr);
+ }
+ printk(KERN_WARNING "%s: no PAL-code memory-descriptor found",
+ __FUNCTION__);
+ return NULL;
+}
+
+void
+efi_map_pal_code (void)
+{
+ void *pal_vaddr = efi_get_pal_addr ();
+ u64 psr;
+
+ if (!pal_vaddr)
+ return;
+
+ /*
+ * Cannot write to CRx with PSR.ic=1
+ */
+ psr = ia64_clear_ic();
+ ia64_itr(0x1, IA64_TR_PALCODE, GRANULEROUNDDOWN((unsigned long) pal_vaddr),
+ pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)),
+ IA64_GRANULE_SHIFT);
+ ia64_set_psr(psr); /* restore psr */
+ ia64_srlz_i();
+}
+
+void __init
+efi_init (void)
+{
+ void *efi_map_start, *efi_map_end;
+ efi_config_table_t *config_tables;
+ efi_char16_t *c16;
+ u64 efi_desc_size;
+ char *cp, *end, vendor[100] = "unknown";
+ extern char saved_command_line[];
+ int i;
+
+ /* it's too early to be able to use the standard kernel command line support... */
+ for (cp = saved_command_line; *cp; ) {
+ if (memcmp(cp, "mem=", 4) == 0) {
+ cp += 4;
+ mem_limit = memparse(cp, &end);
+ if (end != cp)
+ break;
+ cp = end;
+ } else if (memcmp(cp, "max_addr=", 9) == 0) {
+ cp += 9;
+ max_addr = GRANULEROUNDDOWN(memparse(cp, &end));
+ if (end != cp)
+ break;
+ cp = end;
+ } else {
+ while (*cp != ' ' && *cp)
+ ++cp;
+ while (*cp == ' ')
+ ++cp;
+ }
+ }
+ if (max_addr != ~0UL)
+ printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
+
+ efi.systab = __va(ia64_boot_param->efi_systab);
+
+ /*
+ * Verify the EFI Table
+ */
+ if (efi.systab == NULL)
+ panic("Woah! Can't find EFI system table.\n");
+ if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+ panic("Woah! EFI system table signature incorrect\n");
+ if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0)
+ printk(KERN_WARNING "Warning: EFI system table major version mismatch: "
+ "got %d.%02d, expected %d.%02d\n",
+ efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff,
+ EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff);
+
+ config_tables = __va(efi.systab->tables);
+
+ /* Show what we know for posterity */
+ c16 = __va(efi.systab->fw_vendor);
+ if (c16) {
+ for (i = 0;i < (int) sizeof(vendor) && *c16; ++i)
+ vendor[i] = *c16++;
+ vendor[i] = '\0';
+ }
+
+ printk(KERN_INFO "EFI v%u.%.02u by %s:",
+ efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor);
+
+ for (i = 0; i < (int) efi.systab->nr_tables; i++) {
+ if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
+ efi.mps = __va(config_tables[i].table);
+ printk(" MPS=0x%lx", config_tables[i].table);
+ } else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
+ efi.acpi20 = __va(config_tables[i].table);
+ printk(" ACPI 2.0=0x%lx", config_tables[i].table);
+ } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
+ efi.acpi = __va(config_tables[i].table);
+ printk(" ACPI=0x%lx", config_tables[i].table);
+ } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
+ efi.smbios = __va(config_tables[i].table);
+ printk(" SMBIOS=0x%lx", config_tables[i].table);
+ } else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
+ efi.sal_systab = __va(config_tables[i].table);
+ printk(" SALsystab=0x%lx", config_tables[i].table);
+ } else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
+ efi.hcdp = __va(config_tables[i].table);
+ printk(" HCDP=0x%lx", config_tables[i].table);
+ }
+ }
+ printk("\n");
+
+ runtime = __va(efi.systab->runtime);
+ efi.get_time = phys_get_time;
+ efi.set_time = phys_set_time;
+ efi.get_wakeup_time = phys_get_wakeup_time;
+ efi.set_wakeup_time = phys_set_wakeup_time;
+ efi.get_variable = phys_get_variable;
+ efi.get_next_variable = phys_get_next_variable;
+ efi.set_variable = phys_set_variable;
+ efi.get_next_high_mono_count = phys_get_next_high_mono_count;
+ efi.reset_system = phys_reset_system;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+#if EFI_DEBUG
+ /* print EFI memory map: */
+ {
+ efi_memory_desc_t *md;
+ void *p;
+
+ for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) {
+ md = p;
+ printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n",
+ i, md->type, md->attribute, md->phys_addr,
+ md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+ md->num_pages >> (20 - EFI_PAGE_SHIFT));
+ }
+ }
+#endif
+
+ efi_map_pal_code();
+ efi_enter_virtual_mode();
+}
+
+void
+efi_enter_virtual_mode (void)
+{
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ efi_status_t status;
+ u64 efi_desc_size;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+ if (md->attribute & EFI_MEMORY_RUNTIME) {
+ /*
+ * Some descriptors have multiple bits set, so the order of
+ * the tests is relevant.
+ */
+ if (md->attribute & EFI_MEMORY_WB) {
+ md->virt_addr = (u64) __va(md->phys_addr);
+ } else if (md->attribute & EFI_MEMORY_UC) {
+ md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+ } else if (md->attribute & EFI_MEMORY_WC) {
+#if 0
+ md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
+ | _PAGE_D
+ | _PAGE_MA_WC
+ | _PAGE_PL_0
+ | _PAGE_AR_RW));
+#else
+ printk(KERN_INFO "EFI_MEMORY_WC mapping\n");
+ md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+#endif
+ } else if (md->attribute & EFI_MEMORY_WT) {
+#if 0
+ md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
+ | _PAGE_D | _PAGE_MA_WT
+ | _PAGE_PL_0
+ | _PAGE_AR_RW));
+#else
+ printk(KERN_INFO "EFI_MEMORY_WT mapping\n");
+ md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+#endif
+ }
+ }
+ }
+
+ status = efi_call_phys(__va(runtime->set_virtual_address_map),
+ ia64_boot_param->efi_memmap_size,
+ efi_desc_size, ia64_boot_param->efi_memdesc_version,
+ ia64_boot_param->efi_memmap);
+ if (status != EFI_SUCCESS) {
+ printk(KERN_WARNING "warning: unable to switch EFI into virtual mode "
+ "(status=%lu)\n", status);
+ return;
+ }
+
+ /*
+ * Now that EFI is in virtual mode, we call the EFI functions more efficiently:
+ */
+ efi.get_time = virt_get_time;
+ efi.set_time = virt_set_time;
+ efi.get_wakeup_time = virt_get_wakeup_time;
+ efi.set_wakeup_time = virt_set_wakeup_time;
+ efi.get_variable = virt_get_variable;
+ efi.get_next_variable = virt_get_next_variable;
+ efi.set_variable = virt_set_variable;
+ efi.get_next_high_mono_count = virt_get_next_high_mono_count;
+ efi.reset_system = virt_reset_system;
+}
+
+/*
+ * Walk the EFI memory map looking for the I/O port range. There can only be one entry of
+ * this type, other I/O port ranges should be described via ACPI.
+ */
+u64
+efi_get_iobase (void)
+{
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ u64 efi_desc_size;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+ if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
+ if (md->attribute & EFI_MEMORY_UC)
+ return md->phys_addr;
+ }
+ }
+ return 0;
+}
+
+u32
+efi_mem_type (unsigned long phys_addr)
+{
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ u64 efi_desc_size;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+
+ if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
+ return md->type;
+ }
+ return 0;
+}
+
+u64
+efi_mem_attributes (unsigned long phys_addr)
+{
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ u64 efi_desc_size;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+
+ if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
+ return md->attribute;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(efi_mem_attributes);
+
+int
+valid_phys_addr_range (unsigned long phys_addr, unsigned long *size)
+{
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ u64 efi_desc_size;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+
+ if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) {
+ if (!(md->attribute & EFI_MEMORY_WB))
+ return 0;
+
+ if (*size > md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr)
+ *size = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int __init
+efi_uart_console_only(void)
+{
+ efi_status_t status;
+ char *s, name[] = "ConOut";
+ efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+ efi_char16_t *utf16, name_utf16[32];
+ unsigned char data[1024];
+ unsigned long size = sizeof(data);
+ struct efi_generic_dev_path *hdr, *end_addr;
+ int uart = 0;
+
+ /* Convert to UTF-16 */
+ utf16 = name_utf16;
+ s = name;
+ while (*s)
+ *utf16++ = *s++ & 0x7f;
+ *utf16 = 0;
+
+ status = efi.get_variable(name_utf16, &guid, NULL, &size, data);
+ if (status != EFI_SUCCESS) {
+ printk(KERN_ERR "No EFI %s variable?\n", name);
+ return 0;
+ }
+
+ hdr = (struct efi_generic_dev_path *) data;
+ end_addr = (struct efi_generic_dev_path *) ((u8 *) data + size);
+ while (hdr < end_addr) {
+ if (hdr->type == EFI_DEV_MSG &&
+ hdr->sub_type == EFI_DEV_MSG_UART)
+ uart = 1;
+ else if (hdr->type == EFI_DEV_END_PATH ||
+ hdr->type == EFI_DEV_END_PATH2) {
+ if (!uart)
+ return 0;
+ if (hdr->sub_type == EFI_DEV_END_ENTIRE)
+ return 1;
+ uart = 0;
+ }
+ hdr = (struct efi_generic_dev_path *) ((u8 *) hdr + hdr->length);
+ }
+ printk(KERN_ERR "Malformed %s value\n", name);
+ return 0;
+}
diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S
new file mode 100644
index 00000000000..5a7fe70212a
--- /dev/null
+++ b/arch/ia64/kernel/efi_stub.S
@@ -0,0 +1,86 @@
+/*
+ * EFI call stub.
+ *
+ * Copyright (C) 1999-2001 Hewlett-Packard Co
+ * David Mosberger <davidm@hpl.hp.com>
+ *
+ * This stub allows us to make EFI calls in physical mode with interrupts
+ * turned off. We need this because we can't call SetVirtualMap() until
+ * the kernel has booted far enough to allow allocation of struct vma_struct
+ * entries (which we would need to map stuff with memory attributes other
+ * than uncached or writeback...). Since the GetTime() service gets called
+ * earlier than that, we need to be able to make physical mode EFI calls from
+ * the kernel.
+ */
+
+/*
+ * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
+ * Abstraction Layer Specification", revision 2.6e). Note that
+ * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
+ * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
+ * (the br.ia instruction fails unless psr.dfl and psr.dfh are
+ * cleared). Fortunately, SAL promises not to touch the floating
+ * point regs, so at least we don't have to save f2-f127.
+ */
+#define PSR_BITS_TO_CLEAR \
+ (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \
+ IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
+ IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PSR_BITS_TO_SET \
+ (IA64_PSR_BN)
+
+#include <asm/processor.h>
+#include <asm/asmmacro.h>
+
+/*
+ * Inputs:
+ * in0 = address of function descriptor of EFI routine to call
+ * in1..in7 = arguments to routine
+ *
+ * Outputs:
+ * r8 = EFI_STATUS returned by called function
+ */
+
+GLOBAL_ENTRY(efi_call_phys)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+ alloc loc1=ar.pfs,8,7,7,0
+ ld8 r2=[in0],8 // load EFI function's entry point
+ mov loc0=rp
+ .body
+ ;;
+ mov loc2=gp // save global pointer
+ mov loc4=ar.rsc // save RSE configuration
+ mov ar.rsc=0 // put RSE in enforced lazy, LE mode
+ ;;
+ ld8 gp=[in0] // load EFI function's global pointer
+ movl r16=PSR_BITS_TO_CLEAR
+ mov loc3=psr // save processor status word
+ movl r17=PSR_BITS_TO_SET
+ ;;
+ or loc3=loc3,r17
+ mov b6=r2
+ ;;
+ andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared
+ br.call.sptk.many rp=ia64_switch_mode_phys
+.ret0: mov out4=in5
+ mov out0=in1
+ mov out1=in2
+ mov out2=in3
+ mov out3=in4
+ mov out5=in6
+ mov out6=in7
+ mov loc5=r19
+ mov loc6=r20
+ br.call.sptk.many rp=b6 // call the EFI function
+.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode
+ mov r16=loc3
+ mov r19=loc5
+ mov r20=loc6
+ br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+.ret2: mov ar.rsc=loc4 // restore RSE configuration
+ mov ar.pfs=loc1
+ mov rp=loc0
+ mov gp=loc2
+ br.ret.sptk.many rp
+END(efi_call_phys)
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
new file mode 100644
index 00000000000..0272c010a3b
--- /dev/null
+++ b/arch/ia64/kernel/entry.S
@@ -0,0 +1,1587 @@
+/*
+ * ia64/kernel/entry.S
+ *
+ * Kernel entry points.
+ *
+ * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999, 2002-2003
+ * Asit Mallick <Asit.K.Mallick@intel.com>
+ * Don Dugger <Don.Dugger@intel.com>
+ * Suresh Siddha <suresh.b.siddha@intel.com>
+ * Fenghua Yu <fenghua.yu@intel.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+/*
+ * ia64_switch_to now places correct virtual mapping in in TR2 for
+ * kernel stack. This allows us to handle interrupts without changing
+ * to physical mode.
+ *
+ * Jonathan Nicklin <nicklin@missioncriticallinux.com>
+ * Patrick O'Rourke <orourke@missioncriticallinux.com>
+ * 11/07/2000
+ */
+/*
+ * Global (preserved) predicate usage on syscall entry/exit path:
+ *
+ * pKStk: See entry.h.
+ * pUStk: See entry.h.
+ * pSys: See entry.h.
+ * pNonSys: !pSys
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/percpu.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+
+#include "minstate.h"
+
+ /*
+ * execve() is special because in case of success, we need to
+ * setup a null register window frame.
+ */
+ENTRY(ia64_execve)
+ /*
+ * Allocate 8 input registers since ptrace() may clobber them
+ */
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+ alloc loc1=ar.pfs,8,2,4,0
+ mov loc0=rp
+ .body
+ mov out0=in0 // filename
+ ;; // stop bit between alloc and call
+ mov out1=in1 // argv
+ mov out2=in2 // envp
+ add out3=16,sp // regs
+ br.call.sptk.many rp=sys_execve
+.ret0:
+#ifdef CONFIG_IA32_SUPPORT
+ /*
+ * Check if we're returning to ia32 mode. If so, we need to restore ia32 registers
+ * from pt_regs.
+ */
+ adds r16=PT(CR_IPSR)+16,sp
+ ;;
+ ld8 r16=[r16]
+#endif
+ cmp4.ge p6,p7=r8,r0
+ mov ar.pfs=loc1 // restore ar.pfs
+ sxt4 r8=r8 // return 64-bit result
+ ;;
+ stf.spill [sp]=f0
+(p6) cmp.ne pKStk,pUStk=r0,r0 // a successful execve() lands us in user-mode...
+ mov rp=loc0
+(p6) mov ar.pfs=r0 // clear ar.pfs on success
+(p7) br.ret.sptk.many rp
+
+ /*
+ * In theory, we'd have to zap this state only to prevent leaking of
+ * security sensitive state (e.g., if current->mm->dumpable is zero). However,
+ * this executes in less than 20 cycles even on Itanium, so it's not worth
+ * optimizing for...).
+ */
+ mov ar.unat=0; mov ar.lc=0
+ mov r4=0; mov f2=f0; mov b1=r0
+ mov r5=0; mov f3=f0; mov b2=r0
+ mov r6=0; mov f4=f0; mov b3=r0
+ mov r7=0; mov f5=f0; mov b4=r0
+ ldf.fill f12=[sp]; mov f13=f0; mov b5=r0
+ ldf.fill f14=[sp]; ldf.fill f15=[sp]; mov f16=f0
+ ldf.fill f17=[sp]; ldf.fill f18=[sp]; mov f19=f0
+ ldf.fill f20=[sp]; ldf.fill f21=[sp]; mov f22=f0
+ ldf.fill f23=[sp]; ldf.fill f24=[sp]; mov f25=f0
+ ldf.fill f26=[sp]; ldf.fill f27=[sp]; mov f28=f0
+ ldf.fill f29=[sp]; ldf.fill f30=[sp]; mov f31=f0
+#ifdef CONFIG_IA32_SUPPORT
+ tbit.nz p6,p0=r16, IA64_PSR_IS_BIT
+ movl loc0=ia64_ret_from_ia32_execve
+ ;;
+(p6) mov rp=loc0
+#endif
+ br.ret.sptk.many rp
+END(ia64_execve)
+
+/*
+ * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr,
+ * u64 tls)
+ */
+GLOBAL_ENTRY(sys_clone2)
+ /*
+ * Allocate 8 input registers since ptrace() may clobber them
+ */
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+ alloc r16=ar.pfs,8,2,6,0
+ DO_SAVE_SWITCH_STACK
+ adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
+ mov loc0=rp
+ mov loc1=r16 // save ar.pfs across do_fork
+ .body
+ mov out1=in1
+ mov out3=in2
+ tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
+ mov out4=in3 // parent_tidptr: valid only w/CLONE_PARENT_SETTID
+ ;;
+(p6) st8 [r2]=in5 // store TLS in r16 for copy_thread()
+ mov out5=in4 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
+ adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = &regs
+ mov out0=in0 // out0 = clone_flags
+ br.call.sptk.many rp=do_fork
+.ret1: .restore sp
+ adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack
+ mov ar.pfs=loc1
+ mov rp=loc0
+ br.ret.sptk.many rp
+END(sys_clone2)
+
+/*
+ * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls)
+ * Deprecated. Use sys_clone2() instead.
+ */
+GLOBAL_ENTRY(sys_clone)
+ /*
+ * Allocate 8 input registers since ptrace() may clobber them
+ */
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+ alloc r16=ar.pfs,8,2,6,0
+ DO_SAVE_SWITCH_STACK
+ adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
+ mov loc0=rp
+ mov loc1=r16 // save ar.pfs across do_fork
+ .body
+ mov out1=in1
+ mov out3=16 // stacksize (compensates for 16-byte scratch area)
+ tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
+ mov out4=in2 // parent_tidptr: valid only w/CLONE_PARENT_SETTID
+ ;;
+(p6) st8 [r2]=in4 // store TLS in r13 (tp)
+ mov out5=in3 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
+ adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = &regs
+ mov out0=in0 // out0 = clone_flags
+ br.call.sptk.many rp=do_fork
+.ret2: .restore sp
+ adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack
+ mov ar.pfs=loc1
+ mov rp=loc0
+ br.ret.sptk.many rp
+END(sys_clone)
+
+/*
+ * prev_task <- ia64_switch_to(struct task_struct *next)
+ * With Ingo's new scheduler, interrupts are disabled when this routine gets
+ * called. The code starting at .map relies on this. The rest of the code
+ * doesn't care about the interrupt masking status.
+ */
+GLOBAL_ENTRY(ia64_switch_to)
+ .prologue
+ alloc r16=ar.pfs,1,0,0,0
+ DO_SAVE_SWITCH_STACK
+ .body
+
+ adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
+ movl r25=init_task
+ mov r27=IA64_KR(CURRENT_STACK)
+ adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
+ dep r20=0,in0,61,3 // physical address of "next"
+ ;;
+ st8 [r22]=sp // save kernel stack pointer of old task
+ shr.u r26=r20,IA64_GRANULE_SHIFT
+ cmp.eq p7,p6=r25,in0
+ ;;
+ /*
+ * If we've already mapped this task's page, we can skip doing it again.
+ */
+(p6) cmp.eq p7,p6=r26,r27
+(p6) br.cond.dpnt .map
+ ;;
+.done:
+(p6) ssm psr.ic // if we had to map, reenable the psr.ic bit FIRST!!!
+ ;;
+(p6) srlz.d
+ ld8 sp=[r21] // load kernel stack pointer of new task
+ mov IA64_KR(CURRENT)=in0 // update "current" application register
+ mov r8=r13 // return pointer to previously running task
+ mov r13=in0 // set "current" pointer
+ ;;
+ DO_LOAD_SWITCH_STACK
+
+#ifdef CONFIG_SMP
+ sync.i // ensure "fc"s done by this CPU are visible on other CPUs
+#endif
+ br.ret.sptk.many rp // boogie on out in new context
+
+.map:
+ rsm psr.ic // interrupts (psr.i) are already disabled here
+ movl r25=PAGE_KERNEL
+ ;;
+ srlz.d
+ or r23=r25,r20 // construct PA | page properties
+ mov r25=IA64_GRANULE_SHIFT<<2
+ ;;
+ mov cr.itir=r25
+ mov cr.ifa=in0 // VA of next task...
+ ;;
+ mov r25=IA64_TR_CURRENT_STACK
+ mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped...
+ ;;
+ itr.d dtr[r25]=r23 // wire in new mapping...
+ br.cond.sptk .done
+END(ia64_switch_to)
+
+/*
+ * Note that interrupts are enabled during save_switch_stack and load_switch_stack. This
+ * means that we may get an interrupt with "sp" pointing to the new kernel stack while
+ * ar.bspstore is still pointing to the old kernel backing store area. Since ar.rsc,
+ * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a
+ * problem. Also, we don't need to specify unwind information for preserved registers
+ * that are not modified in save_switch_stack as the right unwind information is already
+ * specified at the call-site of save_switch_stack.
+ */
+
+/*
+ * save_switch_stack:
+ * - r16 holds ar.pfs
+ * - b7 holds address to return to
+ * - rp (b0) holds return address to save
+ */
+GLOBAL_ENTRY(save_switch_stack)
+ .prologue
+ .altrp b7
+ flushrs // flush dirty regs to backing store (must be first in insn group)
+ .save @priunat,r17
+ mov r17=ar.unat // preserve caller's
+ .body
+#ifdef CONFIG_ITANIUM
+ adds r2=16+128,sp
+ adds r3=16+64,sp
+ adds r14=SW(R4)+16,sp
+ ;;
+ st8.spill [r14]=r4,16 // spill r4
+ lfetch.fault.excl.nt1 [r3],128
+ ;;
+ lfetch.fault.excl.nt1 [r2],128
+ lfetch.fault.excl.nt1 [r3],128
+ ;;
+ lfetch.fault.excl [r2]
+ lfetch.fault.excl [r3]
+ adds r15=SW(R5)+16,sp
+#else
+ add r2=16+3*128,sp
+ add r3=16,sp
+ add r14=SW(R4)+16,sp
+ ;;
+ st8.spill [r14]=r4,SW(R6)-SW(R4) // spill r4 and prefetch offset 0x1c0
+ lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x010
+ ;;
+ lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x090
+ lfetch.fault.excl.nt1 [r2],128 // prefetch offset 0x190
+ ;;
+ lfetch.fault.excl.nt1 [r3] // prefetch offset 0x110
+ lfetch.fault.excl.nt1 [r2] // prefetch offset 0x210
+ adds r15=SW(R5)+16,sp
+#endif
+ ;;
+ st8.spill [r15]=r5,SW(R7)-SW(R5) // spill r5
+ mov.m ar.rsc=0 // put RSE in mode: enforced lazy, little endian, pl 0
+ add r2=SW(F2)+16,sp // r2 = &sw->f2
+ ;;
+ st8.spill [r14]=r6,SW(B0)-SW(R6) // spill r6
+ mov.m r18=ar.fpsr // preserve fpsr
+ add r3=SW(F3)+16,sp // r3 = &sw->f3
+ ;;
+ stf.spill [r2]=f2,32
+ mov.m r19=ar.rnat
+ mov r21=b0
+
+ stf.spill [r3]=f3,32
+ st8.spill [r15]=r7,SW(B2)-SW(R7) // spill r7
+ mov r22=b1
+ ;;
+ // since we're done with the spills, read and save ar.unat:
+ mov.m r29=ar.unat
+ mov.m r20=ar.bspstore
+ mov r23=b2
+ stf.spill [r2]=f4,32
+ stf.spill [r3]=f5,32
+ mov r24=b3
+ ;;
+ st8 [r14]=r21,SW(B1)-SW(B0) // save b0
+ st8 [r15]=r23,SW(B3)-SW(B2) // save b2
+ mov r25=b4
+ mov r26=b5
+ ;;
+ st8 [r14]=r22,SW(B4)-SW(B1) // save b1
+ st8 [r15]=r24,SW(AR_PFS)-SW(B3) // save b3
+ mov r21=ar.lc // I-unit
+ stf.spill [r2]=f12,32
+ stf.spill [r3]=f13,32
+ ;;
+ st8 [r14]=r25,SW(B5)-SW(B4) // save b4
+ st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS) // save ar.pfs
+ stf.spill [r2]=f14,32
+ stf.spill [r3]=f15,32
+ ;;
+ st8 [r14]=r26 // save b5
+ st8 [r15]=r21 // save ar.lc
+ stf.spill [r2]=f16,32
+ stf.spill [r3]=f17,32
+ ;;
+ stf.spill [r2]=f18,32
+ stf.spill [r3]=f19,32
+ ;;
+ stf.spill [r2]=f20,32
+ stf.spill [r3]=f21,32
+ ;;
+ stf.spill [r2]=f22,32
+ stf.spill [r3]=f23,32
+ ;;
+ stf.spill [r2]=f24,32
+ stf.spill [r3]=f25,32
+ ;;
+ stf.spill [r2]=f26,32
+ stf.spill [r3]=f27,32
+ ;;
+ stf.spill [r2]=f28,32
+ stf.spill [r3]=f29,32
+ ;;
+ stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30)
+ stf.spill [r3]=f31,SW(PR)-SW(F31)
+ add r14=SW(CALLER_UNAT)+16,sp
+ ;;
+ st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT) // save ar.unat
+ st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat
+ mov r21=pr
+ ;;
+ st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat
+ st8 [r3]=r21 // save predicate registers
+ ;;
+ st8 [r2]=r20 // save ar.bspstore
+ st8 [r14]=r18 // save fpsr
+ mov ar.rsc=3 // put RSE back into eager mode, pl 0
+ br.cond.sptk.many b7
+END(save_switch_stack)
+
+/*
+ * load_switch_stack:
+ * - "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK)
+ * - b7 holds address to return to
+ * - must not touch r8-r11
+ */
+ENTRY(load_switch_stack)
+ .prologue
+ .altrp b7
+
+ .body
+ lfetch.fault.nt1 [sp]
+ adds r2=SW(AR_BSPSTORE)+16,sp
+ adds r3=SW(AR_UNAT)+16,sp
+ mov ar.rsc=0 // put RSE into enforced lazy mode
+ adds r14=SW(CALLER_UNAT)+16,sp
+ adds r15=SW(AR_FPSR)+16,sp
+ ;;
+ ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE)) // bspstore
+ ld8 r29=[r3],(SW(B1)-SW(AR_UNAT)) // unat
+ ;;
+ ld8 r21=[r2],16 // restore b0
+ ld8 r22=[r3],16 // restore b1
+ ;;
+ ld8 r23=[r2],16 // restore b2
+ ld8 r24=[r3],16 // restore b3
+ ;;
+ ld8 r25=[r2],16 // restore b4
+ ld8 r26=[r3],16 // restore b5
+ ;;
+ ld8 r16=[r2],(SW(PR)-SW(AR_PFS)) // ar.pfs
+ ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC)) // ar.lc
+ ;;
+ ld8 r28=[r2] // restore pr
+ ld8 r30=[r3] // restore rnat
+ ;;
+ ld8 r18=[r14],16 // restore caller's unat
+ ld8 r19=[r15],24 // restore fpsr
+ ;;
+ ldf.fill f2=[r14],32
+ ldf.fill f3=[r15],32
+ ;;
+ ldf.fill f4=[r14],32
+ ldf.fill f5=[r15],32
+ ;;
+ ldf.fill f12=[r14],32
+ ldf.fill f13=[r15],32
+ ;;
+ ldf.fill f14=[r14],32
+ ldf.fill f15=[r15],32
+ ;;
+ ldf.fill f16=[r14],32
+ ldf.fill f17=[r15],32
+ ;;
+ ldf.fill f18=[r14],32
+ ldf.fill f19=[r15],32
+ mov b0=r21
+ ;;
+ ldf.fill f20=[r14],32
+ ldf.fill f21=[r15],32
+ mov b1=r22
+ ;;
+ ldf.fill f22=[r14],32
+ ldf.fill f23=[r15],32
+ mov b2=r23
+ ;;
+ mov ar.bspstore=r27
+ mov ar.unat=r29 // establish unat holding the NaT bits for r4-r7
+ mov b3=r24
+ ;;
+ ldf.fill f24=[r14],32
+ ldf.fill f25=[r15],32
+ mov b4=r25
+ ;;
+ ldf.fill f26=[r14],32
+ ldf.fill f27=[r15],32
+ mov b5=r26
+ ;;
+ ldf.fill f28=[r14],32
+ ldf.fill f29=[r15],32
+ mov ar.pfs=r16
+ ;;
+ ldf.fill f30=[r14],32
+ ldf.fill f31=[r15],24
+ mov ar.lc=r17
+ ;;
+ ld8.fill r4=[r14],16
+ ld8.fill r5=[r15],16
+ mov pr=r28,-1
+ ;;
+ ld8.fill r6=[r14],16
+ ld8.fill r7=[r15],16
+
+ mov ar.unat=r18 // restore caller's unat
+ mov ar.rnat=r30 // must restore after bspstore but before rsc!
+ mov ar.fpsr=r19 // restore fpsr
+ mov ar.rsc=3 // put RSE back into eager mode, pl 0
+ br.cond.sptk.many b7
+END(load_switch_stack)
+
+GLOBAL_ENTRY(__ia64_syscall)
+ .regstk 6,0,0,0
+ mov r15=in5 // put syscall number in place
+ break __BREAK_SYSCALL
+ movl r2=errno
+ cmp.eq p6,p7=-1,r10
+ ;;
+(p6) st4 [r2]=r8
+(p6) mov r8=-1
+ br.ret.sptk.many rp
+END(__ia64_syscall)
+
+GLOBAL_ENTRY(execve)
+ mov r15=__NR_execve // put syscall number in place
+ break __BREAK_SYSCALL
+ br.ret.sptk.many rp
+END(execve)
+
+GLOBAL_ENTRY(clone)
+ mov r15=__NR_clone // put syscall number in place
+ break __BREAK_SYSCALL
+ br.ret.sptk.many rp
+END(clone)
+
+ /*
+ * Invoke a system call, but do some tracing before and after the call.
+ * We MUST preserve the current register frame throughout this routine
+ * because some system calls (such as ia64_execve) directly
+ * manipulate ar.pfs.
+ */
+GLOBAL_ENTRY(ia64_trace_syscall)
+ PT_REGS_UNWIND_INFO(0)
+ /*
+ * We need to preserve the scratch registers f6-f11 in case the system
+ * call is sigreturn.
+ */
+ adds r16=PT(F6)+16,sp
+ adds r17=PT(F7)+16,sp
+ ;;
+ stf.spill [r16]=f6,32
+ stf.spill [r17]=f7,32
+ ;;
+ stf.spill [r16]=f8,32
+ stf.spill [r17]=f9,32
+ ;;
+ stf.spill [r16]=f10
+ stf.spill [r17]=f11
+ br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
+ adds r16=PT(F6)+16,sp
+ adds r17=PT(F7)+16,sp
+ ;;
+ ldf.fill f6=[r16],32
+ ldf.fill f7=[r17],32
+ ;;
+ ldf.fill f8=[r16],32
+ ldf.fill f9=[r17],32
+ ;;
+ ldf.fill f10=[r16]
+ ldf.fill f11=[r17]
+ // the syscall number may have changed, so re-load it and re-calculate the
+ // syscall entry-point:
+ adds r15=PT(R15)+16,sp // r15 = &pt_regs.r15 (syscall #)
+ ;;
+ ld8 r15=[r15]
+ mov r3=NR_syscalls - 1
+ ;;
+ adds r15=-1024,r15
+ movl r16=sys_call_table
+ ;;
+ shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024)
+ cmp.leu p6,p7=r15,r3
+ ;;
+(p6) ld8 r20=[r20] // load address of syscall entry point
+(p7) movl r20=sys_ni_syscall
+ ;;
+ mov b6=r20
+ br.call.sptk.many rp=b6 // do the syscall
+.strace_check_retval:
+ cmp.lt p6,p0=r8,r0 // syscall failed?
+ adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8
+ adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10
+ mov r10=0
+(p6) br.cond.sptk strace_error // syscall failed ->
+ ;; // avoid RAW on r10
+.strace_save_retval:
+.mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8
+.mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10
+ br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
+.ret3: br.cond.sptk .work_pending_syscall_end
+
+strace_error:
+ ld8 r3=[r2] // load pt_regs.r8
+ sub r9=0,r8 // negate return value to get errno value
+ ;;
+ cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0?
+ adds r3=16,r2 // r3=&pt_regs.r10
+ ;;
+(p6) mov r10=-1
+(p6) mov r8=r9
+ br.cond.sptk .strace_save_retval
+END(ia64_trace_syscall)
+
+ /*
+ * When traced and returning from sigreturn, we invoke syscall_trace but then
+ * go straight to ia64_leave_kernel rather than ia64_leave_syscall.
+ */
+GLOBAL_ENTRY(ia64_strace_leave_kernel)
+ PT_REGS_UNWIND_INFO(0)
+{ /*
+ * Some versions of gas generate bad unwind info if the first instruction of a
+ * procedure doesn't go into the first slot of a bundle. This is a workaround.
+ */
+ nop.m 0
+ nop.i 0
+ br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
+}
+.ret4: br.cond.sptk ia64_leave_kernel
+END(ia64_strace_leave_kernel)
+
+GLOBAL_ENTRY(ia64_ret_from_clone)
+ PT_REGS_UNWIND_INFO(0)
+{ /*
+ * Some versions of gas generate bad unwind info if the first instruction of a
+ * procedure doesn't go into the first slot of a bundle. This is a workaround.
+ */
+ nop.m 0
+ nop.i 0
+ /*
+ * We need to call schedule_tail() to complete the scheduling process.
+ * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the
+ * address of the previously executing task.
+ */
+ br.call.sptk.many rp=ia64_invoke_schedule_tail
+}
+.ret8:
+ adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+ ld4 r2=[r2]
+ ;;
+ mov r8=0
+ and r2=_TIF_SYSCALL_TRACEAUDIT,r2
+ ;;
+ cmp.ne p6,p0=r2,r0
+(p6) br.cond.spnt .strace_check_retval
+ ;; // added stop bits to prevent r8 dependency
+END(ia64_ret_from_clone)
+ // fall through
+GLOBAL_ENTRY(ia64_ret_from_syscall)
+ PT_REGS_UNWIND_INFO(0)
+ cmp.ge p6,p7=r8,r0 // syscall executed successfully?
+ adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8
+ mov r10=r0 // clear error indication in r10
+(p7) br.cond.spnt handle_syscall_error // handle potential syscall failure
+END(ia64_ret_from_syscall)
+ // fall through
+/*
+ * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
+ * need to switch to bank 0 and doesn't restore the scratch registers.
+ * To avoid leaking kernel bits, the scratch registers are set to
+ * the following known-to-be-safe values:
+ *
+ * r1: restored (global pointer)
+ * r2: cleared
+ * r3: 1 (when returning to user-level)
+ * r8-r11: restored (syscall return value(s))
+ * r12: restored (user-level stack pointer)
+ * r13: restored (user-level thread pointer)
+ * r14: cleared
+ * r15: restored (syscall #)
+ * r16-r17: cleared
+ * r18: user-level b6
+ * r19: cleared
+ * r20: user-level ar.fpsr
+ * r21: user-level b0
+ * r22: cleared
+ * r23: user-level ar.bspstore
+ * r24: user-level ar.rnat
+ * r25: user-level ar.unat
+ * r26: user-level ar.pfs
+ * r27: user-level ar.rsc
+ * r28: user-level ip
+ * r29: user-level psr
+ * r30: user-level cfm
+ * r31: user-level pr
+ * f6-f11: cleared
+ * pr: restored (user-level pr)
+ * b0: restored (user-level rp)
+ * b6: restored
+ * b7: cleared
+ * ar.unat: restored (user-level ar.unat)
+ * ar.pfs: restored (user-level ar.pfs)
+ * ar.rsc: restored (user-level ar.rsc)
+ * ar.rnat: restored (user-level ar.rnat)
+ * ar.bspstore: restored (user-level ar.bspstore)
+ * ar.fpsr: restored (user-level ar.fpsr)
+ * ar.ccv: cleared
+ * ar.csd: cleared
+ * ar.ssd: cleared
+ */
+ENTRY(ia64_leave_syscall)
+ PT_REGS_UNWIND_INFO(0)
+ /*
+ * work.need_resched etc. mustn't get changed by this CPU before it returns to
+ * user- or fsys-mode, hence we disable interrupts early on.
+ *
+ * p6 controls whether current_thread_info()->flags needs to be check for
+ * extra work. We always check for extra work when returning to user-level.
+ * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
+ * is 0. After extra work processing has been completed, execution
+ * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
+ * needs to be redone.
+ */
+#ifdef CONFIG_PREEMPT
+ rsm psr.i // disable interrupts
+ cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall
+(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
+ ;;
+ .pred.rel.mutex pUStk,pKStk
+(pKStk) ld4 r21=[r20] // r21 <- preempt_count
+(pUStk) mov r21=0 // r21 <- 0
+ ;;
+ cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0)
+#else /* !CONFIG_PREEMPT */
+(pUStk) rsm psr.i
+ cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall
+(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
+#endif
+.work_processed_syscall:
+ adds r2=PT(LOADRS)+16,r12
+ adds r3=PT(AR_BSPSTORE)+16,r12
+ adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+(p6) ld4 r31=[r18] // load current_thread_info()->flags
+ ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
+ mov b7=r0 // clear b7
+ ;;
+ ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage)
+ ld8 r18=[r2],PT(R9)-PT(B6) // load b6
+(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
+ ;;
+ mov r16=ar.bsp // M2 get existing backing store pointer
+(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending?
+(p6) br.cond.spnt .work_pending_syscall
+ ;;
+ // start restoring the state saved on the kernel stack (struct pt_regs):
+ ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
+ ld8 r11=[r3],PT(CR_IIP)-PT(R11)
+ mov f6=f0 // clear f6
+ ;;
+ invala // M0|1 invalidate ALAT
+ rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection
+ mov f9=f0 // clear f9
+
+ ld8 r29=[r2],16 // load cr.ipsr
+ ld8 r28=[r3],16 // load cr.iip
+ mov f8=f0 // clear f8
+ ;;
+ ld8 r30=[r2],16 // M0|1 load cr.ifs
+ mov.m ar.ssd=r0 // M2 clear ar.ssd
+ cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs
+ ;;
+ ld8 r25=[r3],16 // M0|1 load ar.unat
+ mov.m ar.csd=r0 // M2 clear ar.csd
+ mov r22=r0 // clear r22
+ ;;
+ ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
+(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
+ mov f10=f0 // clear f10
+ ;;
+ ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
+ ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc
+ mov f11=f0 // clear f11
+ ;;
+ ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage)
+ ld8 r31=[r3],PT(R1)-PT(PR) // load predicates
+(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+ ;;
+ ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr
+ ld8.fill r1=[r3],16 // load r1
+(pUStk) mov r17=1
+ ;;
+ srlz.d // M0 ensure interruption collection is off
+ ld8.fill r13=[r3],16
+ mov f7=f0 // clear f7
+ ;;
+ ld8.fill r12=[r2] // restore r12 (sp)
+ ld8.fill r15=[r3] // restore r15
+ addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
+ ;;
+(pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8
+(pUStk) st1 [r14]=r17
+ mov b6=r18 // I0 restore b6
+ ;;
+ mov r14=r0 // clear r14
+ shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
+(pKStk) br.cond.dpnt.many skip_rbs_switch
+
+ mov.m ar.ccv=r0 // clear ar.ccv
+(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
+ br.cond.sptk.many rbs_switch
+END(ia64_leave_syscall)
+
+#ifdef CONFIG_IA32_SUPPORT
+GLOBAL_ENTRY(ia64_ret_from_ia32_execve)
+ PT_REGS_UNWIND_INFO(0)
+ adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8
+ adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10
+ ;;
+ .mem.offset 0,0
+ st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit
+ .mem.offset 8,0
+ st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit
+END(ia64_ret_from_ia32_execve_syscall)
+ // fall through
+#endif /* CONFIG_IA32_SUPPORT */
+GLOBAL_ENTRY(ia64_leave_kernel)
+ PT_REGS_UNWIND_INFO(0)
+ /*
+ * work.need_resched etc. mustn't get changed by this CPU before it returns to
+ * user- or fsys-mode, hence we disable interrupts early on.
+ *
+ * p6 controls whether current_thread_info()->flags needs to be check for
+ * extra work. We always check for extra work when returning to user-level.
+ * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
+ * is 0. After extra work processing has been completed, execution
+ * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
+ * needs to be redone.
+ */
+#ifdef CONFIG_PREEMPT
+ rsm psr.i // disable interrupts
+ cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel
+(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
+ ;;
+ .pred.rel.mutex pUStk,pKStk
+(pKStk) ld4 r21=[r20] // r21 <- preempt_count
+(pUStk) mov r21=0 // r21 <- 0
+ ;;
+ cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0)
+#else
+(pUStk) rsm psr.i
+ cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel
+(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
+#endif
+.work_processed_kernel:
+ adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+(p6) ld4 r31=[r17] // load current_thread_info()->flags
+ adds r21=PT(PR)+16,r12
+ ;;
+
+ lfetch [r21],PT(CR_IPSR)-PT(PR)
+ adds r2=PT(B6)+16,r12
+ adds r3=PT(R16)+16,r12
+ ;;
+ lfetch [r21]
+ ld8 r28=[r2],8 // load b6
+ adds r29=PT(R24)+16,r12
+
+ ld8.fill r16=[r3],PT(AR_CSD)-PT(R16)
+ adds r30=PT(AR_CCV)+16,r12
+(p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
+ ;;
+ ld8.fill r24=[r29]
+ ld8 r15=[r30] // load ar.ccv
+(p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending?
+ ;;
+ ld8 r29=[r2],16 // load b7
+ ld8 r30=[r3],16 // load ar.csd
+(p6) br.cond.spnt .work_pending
+ ;;
+ ld8 r31=[r2],16 // load ar.ssd
+ ld8.fill r8=[r3],16
+ ;;
+ ld8.fill r9=[r2],16
+ ld8.fill r10=[r3],PT(R17)-PT(R10)
+ ;;
+ ld8.fill r11=[r2],PT(R18)-PT(R11)
+ ld8.fill r17=[r3],16
+ ;;
+ ld8.fill r18=[r2],16
+ ld8.fill r19=[r3],16
+ ;;
+ ld8.fill r20=[r2],16
+ ld8.fill r21=[r3],16
+ mov ar.csd=r30
+ mov ar.ssd=r31
+ ;;
+ rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection
+ invala // invalidate ALAT
+ ;;
+ ld8.fill r22=[r2],24
+ ld8.fill r23=[r3],24
+ mov b6=r28
+ ;;
+ ld8.fill r25=[r2],16
+ ld8.fill r26=[r3],16
+ mov b7=r29
+ ;;
+ ld8.fill r27=[r2],16
+ ld8.fill r28=[r3],16
+ ;;
+ ld8.fill r29=[r2],16
+ ld8.fill r30=[r3],24
+ ;;
+ ld8.fill r31=[r2],PT(F9)-PT(R31)
+ adds r3=PT(F10)-PT(F6),r3
+ ;;
+ ldf.fill f9=[r2],PT(F6)-PT(F9)
+ ldf.fill f10=[r3],PT(F8)-PT(F10)
+ ;;
+ ldf.fill f6=[r2],PT(F7)-PT(F6)
+ ;;
+ ldf.fill f7=[r2],PT(F11)-PT(F7)
+ ldf.fill f8=[r3],32
+ ;;
+ srlz.i // ensure interruption collection is off
+ mov ar.ccv=r15
+ ;;
+ ldf.fill f11=[r2]
+ bsw.0 // switch back to bank 0 (no stop bit required beforehand...)
+ ;;
+(pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
+ adds r16=PT(CR_IPSR)+16,r12
+ adds r17=PT(CR_IIP)+16,r12
+
+(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
+ nop.i 0
+ nop.i 0
+ ;;
+ ld8 r29=[r16],16 // load cr.ipsr
+ ld8 r28=[r17],16 // load cr.iip
+ ;;
+ ld8 r30=[r16],16 // load cr.ifs
+ ld8 r25=[r17],16 // load ar.unat
+ ;;
+ ld8 r26=[r16],16 // load ar.pfs
+ ld8 r27=[r17],16 // load ar.rsc
+ cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs
+ ;;
+ ld8 r24=[r16],16 // load ar.rnat (may be garbage)
+ ld8 r23=[r17],16 // load ar.bspstore (may be garbage)
+ ;;
+ ld8 r31=[r16],16 // load predicates
+ ld8 r21=[r17],16 // load b0
+ ;;
+ ld8 r19=[r16],16 // load ar.rsc value for "loadrs"
+ ld8.fill r1=[r17],16 // load r1
+ ;;
+ ld8.fill r12=[r16],16
+ ld8.fill r13=[r17],16
+(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
+ ;;
+ ld8 r20=[r16],16 // ar.fpsr
+ ld8.fill r15=[r17],16
+ ;;
+ ld8.fill r14=[r16],16
+ ld8.fill r2=[r17]
+(pUStk) mov r17=1
+ ;;
+ ld8.fill r3=[r16]
+(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
+ shr.u r18=r19,16 // get byte size of existing "dirty" partition
+ ;;
+ mov r16=ar.bsp // get existing backing store pointer
+ addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
+ ;;
+ ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
+(pKStk) br.cond.dpnt skip_rbs_switch
+
+ /*
+ * Restore user backing store.
+ *
+ * NOTE: alloc, loadrs, and cover can't be predicated.
+ */
+(pNonSys) br.cond.dpnt dont_preserve_current_frame
+
+rbs_switch:
+ cover // add current frame into dirty partition and set cr.ifs
+ ;;
+ mov r19=ar.bsp // get new backing store pointer
+ sub r16=r16,r18 // krbs = old bsp - size of dirty partition
+ cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs
+ ;;
+ sub r19=r19,r16 // calculate total byte size of dirty partition
+ add r18=64,r18 // don't force in0-in7 into memory...
+ ;;
+ shl r19=r19,16 // shift size of dirty partition into loadrs position
+ ;;
+dont_preserve_current_frame:
+ /*
+ * To prevent leaking bits between the kernel and user-space,
+ * we must clear the stacked registers in the "invalid" partition here.
+ * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium,
+ * 5 registers/cycle on McKinley).
+ */
+# define pRecurse p6
+# define pReturn p7
+#ifdef CONFIG_ITANIUM
+# define Nregs 10
+#else
+# define Nregs 14
+#endif
+ alloc loc0=ar.pfs,2,Nregs-2,2,0
+ shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
+ sub r17=r17,r18 // r17 = (physStackedSize + 8) - dirtySize
+ ;;
+ mov ar.rsc=r19 // load ar.rsc to be used for "loadrs"
+ shladd in0=loc1,3,r17
+ mov in1=0
+ ;;
+ TEXT_ALIGN(32)
+rse_clear_invalid:
+#ifdef CONFIG_ITANIUM
+ // cycle 0
+ { .mii
+ alloc loc0=ar.pfs,2,Nregs-2,2,0
+ cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse
+ add out0=-Nregs*8,in0
+}{ .mfb
+ add out1=1,in1 // increment recursion count
+ nop.f 0
+ nop.b 0 // can't do br.call here because of alloc (WAW on CFM)
+ ;;
+}{ .mfi // cycle 1
+ mov loc1=0
+ nop.f 0
+ mov loc2=0
+}{ .mib
+ mov loc3=0
+ mov loc4=0
+(pRecurse) br.call.sptk.many b0=rse_clear_invalid
+
+}{ .mfi // cycle 2
+ mov loc5=0
+ nop.f 0
+ cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret
+}{ .mib
+ mov loc6=0
+ mov loc7=0
+(pReturn) br.ret.sptk.many b0
+}
+#else /* !CONFIG_ITANIUM */
+ alloc loc0=ar.pfs,2,Nregs-2,2,0
+ cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse
+ add out0=-Nregs*8,in0
+ add out1=1,in1 // increment recursion count
+ mov loc1=0
+ mov loc2=0
+ ;;
+ mov loc3=0
+ mov loc4=0
+ mov loc5=0
+ mov loc6=0
+ mov loc7=0
+(pRecurse) br.call.sptk.few b0=rse_clear_invalid
+ ;;
+ mov loc8=0
+ mov loc9=0
+ cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret
+ mov loc10=0
+ mov loc11=0
+(pReturn) br.ret.sptk.many b0
+#endif /* !CONFIG_ITANIUM */
+# undef pRecurse
+# undef pReturn
+ ;;
+ alloc r17=ar.pfs,0,0,0,0 // drop current register frame
+ ;;
+ loadrs
+ ;;
+skip_rbs_switch:
+ mov ar.unat=r25 // M2
+(pKStk) extr.u r22=r22,21,1 // I0 extract current value of psr.pp from r22
+(pLvSys)mov r19=r0 // A clear r19 for leave_syscall, no-op otherwise
+ ;;
+(pUStk) mov ar.bspstore=r23 // M2
+(pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp
+(pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise
+ ;;
+ mov cr.ipsr=r29 // M2
+ mov ar.pfs=r26 // I0
+(pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise
+
+(p9) mov cr.ifs=r30 // M2
+ mov b0=r21 // I0
+(pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise
+
+ mov ar.fpsr=r20 // M2
+ mov cr.iip=r28 // M2
+ nop 0
+ ;;
+(pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode
+ nop 0
+(pLvSys)mov r2=r0
+
+ mov ar.rsc=r27 // M2
+ mov pr=r31,-1 // I0
+ rfi // B
+
+ /*
+ * On entry:
+ * r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
+ * r31 = current->thread_info->flags
+ * On exit:
+ * p6 = TRUE if work-pending-check needs to be redone
+ */
+.work_pending_syscall:
+ add r2=-8,r2
+ add r3=-8,r3
+ ;;
+ st8 [r2]=r8
+ st8 [r3]=r10
+.work_pending:
+ tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context?
+(p6) br.cond.sptk.few .sigdelayed
+ ;;
+ tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
+(p6) br.cond.sptk.few .notify
+#ifdef CONFIG_PREEMPT
+(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
+ ;;
+(pKStk) st4 [r20]=r21
+ ssm psr.i // enable interrupts
+#endif
+ br.call.spnt.many rp=schedule
+.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1
+ rsm psr.i // disable interrupts
+ ;;
+#ifdef CONFIG_PREEMPT
+(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
+ ;;
+(pKStk) st4 [r20]=r0 // preempt_count() <- 0
+#endif
+(pLvSys)br.cond.sptk.few .work_pending_syscall_end
+ br.cond.sptk.many .work_processed_kernel // re-check
+
+.notify:
+(pUStk) br.call.spnt.many rp=notify_resume_user
+.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0
+(pLvSys)br.cond.sptk.few .work_pending_syscall_end
+ br.cond.sptk.many .work_processed_kernel // don't re-check
+
+// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
+// it could not be delivered. Deliver it now. The signal might be for us and
+// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
+// signal.
+
+.sigdelayed:
+ br.call.sptk.many rp=do_sigdelayed
+ cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check
+(pLvSys)br.cond.sptk.few .work_pending_syscall_end
+ br.cond.sptk.many .work_processed_kernel // re-check
+
+.work_pending_syscall_end:
+ adds r2=PT(R8)+16,r12
+ adds r3=PT(R10)+16,r12
+ ;;
+ ld8 r8=[r2]
+ ld8 r10=[r3]
+ br.cond.sptk.many .work_processed_syscall // re-check
+
+END(ia64_leave_kernel)
+
+ENTRY(handle_syscall_error)
+ /*
+ * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could
+ * lead us to mistake a negative return value as a failed syscall. Those syscall
+ * must deposit a non-zero value in pt_regs.r8 to indicate an error. If
+ * pt_regs.r8 is zero, we assume that the call completed successfully.
+ */
+ PT_REGS_UNWIND_INFO(0)
+ ld8 r3=[r2] // load pt_regs.r8
+ ;;
+ cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0?
+ ;;
+(p7) mov r10=-1
+(p7) sub r8=0,r8 // negate return value to get errno
+ br.cond.sptk ia64_leave_syscall
+END(handle_syscall_error)
+
+ /*
+ * Invoke schedule_tail(task) while preserving in0-in7, which may be needed
+ * in case a system call gets restarted.
+ */
+GLOBAL_ENTRY(ia64_invoke_schedule_tail)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+ alloc loc1=ar.pfs,8,2,1,0
+ mov loc0=rp
+ mov out0=r8 // Address of previous task
+ ;;
+ br.call.sptk.many rp=schedule_tail
+.ret11: mov ar.pfs=loc1
+ mov rp=loc0
+ br.ret.sptk.many rp
+END(ia64_invoke_schedule_tail)
+
+ /*
+ * Setup stack and call do_notify_resume_user(). Note that pSys and pNonSys need to
+ * be set up by the caller. We declare 8 input registers so the system call
+ * args get preserved, in case we need to restart a system call.
+ */
+ENTRY(notify_resume_user)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+ alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
+ mov r9=ar.unat
+ mov loc0=rp // save return address
+ mov out0=0 // there is no "oldset"
+ adds out1=8,sp // out1=&sigscratch->ar_pfs
+(pSys) mov out2=1 // out2==1 => we're in a syscall
+ ;;
+(pNonSys) mov out2=0 // out2==0 => not a syscall
+ .fframe 16
+ .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!)
+ st8 [sp]=r9,-16 // allocate space for ar.unat and save it
+ st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch
+ .body
+ br.call.sptk.many rp=do_notify_resume_user
+.ret15: .restore sp
+ adds sp=16,sp // pop scratch stack space
+ ;;
+ ld8 r9=[sp] // load new unat from sigscratch->scratch_unat
+ mov rp=loc0
+ ;;
+ mov ar.unat=r9
+ mov ar.pfs=loc1
+ br.ret.sptk.many rp
+END(notify_resume_user)
+
+GLOBAL_ENTRY(sys_rt_sigsuspend)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+ alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
+ mov r9=ar.unat
+ mov loc0=rp // save return address
+ mov out0=in0 // mask
+ mov out1=in1 // sigsetsize
+ adds out2=8,sp // out2=&sigscratch->ar_pfs
+ ;;
+ .fframe 16
+ .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!)
+ st8 [sp]=r9,-16 // allocate space for ar.unat and save it
+ st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch
+ .body
+ br.call.sptk.many rp=ia64_rt_sigsuspend
+.ret17: .restore sp
+ adds sp=16,sp // pop scratch stack space
+ ;;
+ ld8 r9=[sp] // load new unat from sw->caller_unat
+ mov rp=loc0
+ ;;
+ mov ar.unat=r9
+ mov ar.pfs=loc1
+ br.ret.sptk.many rp
+END(sys_rt_sigsuspend)
+
+ENTRY(sys_rt_sigreturn)
+ PT_REGS_UNWIND_INFO(0)
+ /*
+ * Allocate 8 input registers since ptrace() may clobber them
+ */
+ alloc r2=ar.pfs,8,0,1,0
+ .prologue
+ PT_REGS_SAVES(16)
+ adds sp=-16,sp
+ .body
+ cmp.eq pNonSys,pSys=r0,r0 // sigreturn isn't a normal syscall...
+ ;;
+ /*
+ * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined
+ * syscall-entry path does not save them we save them here instead. Note: we
+ * don't need to save any other registers that are not saved by the stream-lined
+ * syscall path, because restore_sigcontext() restores them.
+ */
+ adds r16=PT(F6)+32,sp
+ adds r17=PT(F7)+32,sp
+ ;;
+ stf.spill [r16]=f6,32
+ stf.spill [r17]=f7,32
+ ;;
+ stf.spill [r16]=f8,32
+ stf.spill [r17]=f9,32
+ ;;
+ stf.spill [r16]=f10
+ stf.spill [r17]=f11
+ adds out0=16,sp // out0 = &sigscratch
+ br.call.sptk.many rp=ia64_rt_sigreturn
+.ret19: .restore sp 0
+ adds sp=16,sp
+ ;;
+ ld8 r9=[sp] // load new ar.unat
+ mov.sptk b7=r8,ia64_leave_kernel
+ ;;
+ mov ar.unat=r9
+ br.many b7
+END(sys_rt_sigreturn)
+
+GLOBAL_ENTRY(ia64_prepare_handle_unaligned)
+ .prologue
+ /*
+ * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
+ */
+ mov r16=r0
+ DO_SAVE_SWITCH_STACK
+ br.call.sptk.many rp=ia64_handle_unaligned // stack frame setup in ivt
+.ret21: .body
+ DO_LOAD_SWITCH_STACK
+ br.cond.sptk.many rp // goes to ia64_leave_kernel
+END(ia64_prepare_handle_unaligned)
+
+ //
+ // unw_init_running(void (*callback)(info, arg), void *arg)
+ //
+# define EXTRA_FRAME_SIZE ((UNW_FRAME_INFO_SIZE+15)&~15)
+
+GLOBAL_ENTRY(unw_init_running)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
+ alloc loc1=ar.pfs,2,3,3,0
+ ;;
+ ld8 loc2=[in0],8
+ mov loc0=rp
+ mov r16=loc1
+ DO_SAVE_SWITCH_STACK
+ .body
+
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
+ .fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE
+ SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE)
+ adds sp=-EXTRA_FRAME_SIZE,sp
+ .body
+ ;;
+ adds out0=16,sp // &info
+ mov out1=r13 // current
+ adds out2=16+EXTRA_FRAME_SIZE,sp // &switch_stack
+ br.call.sptk.many rp=unw_init_frame_info
+1: adds out0=16,sp // &info
+ mov b6=loc2
+ mov loc2=gp // save gp across indirect function call
+ ;;
+ ld8 gp=[in0]
+ mov out1=in1 // arg
+ br.call.sptk.many rp=b6 // invoke the callback function
+1: mov gp=loc2 // restore gp
+
+ // For now, we don't allow changing registers from within
+ // unw_init_running; if we ever want to allow that, we'd
+ // have to do a load_switch_stack here:
+ .restore sp
+ adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp
+
+ mov ar.pfs=loc1
+ mov rp=loc0
+ br.ret.sptk.many rp
+END(unw_init_running)
+
+ .rodata
+ .align 8
+ .globl sys_call_table
+sys_call_table:
+ data8 sys_ni_syscall // This must be sys_ni_syscall! See ivt.S.
+ data8 sys_exit // 1025
+ data8 sys_read
+ data8 sys_write
+ data8 sys_open
+ data8 sys_close
+ data8 sys_creat // 1030
+ data8 sys_link
+ data8 sys_unlink
+ data8 ia64_execve
+ data8 sys_chdir
+ data8 sys_fchdir // 1035
+ data8 sys_utimes
+ data8 sys_mknod
+ data8 sys_chmod
+ data8 sys_chown
+ data8 sys_lseek // 1040
+ data8 sys_getpid
+ data8 sys_getppid
+ data8 sys_mount
+ data8 sys_umount
+ data8 sys_setuid // 1045
+ data8 sys_getuid
+ data8 sys_geteuid
+ data8 sys_ptrace
+ data8 sys_access
+ data8 sys_sync // 1050
+ data8 sys_fsync
+ data8 sys_fdatasync
+ data8 sys_kill
+ data8 sys_rename
+ data8 sys_mkdir // 1055
+ data8 sys_rmdir
+ data8 sys_dup
+ data8 sys_pipe
+ data8 sys_times
+ data8 ia64_brk // 1060
+ data8 sys_setgid
+ data8 sys_getgid
+ data8 sys_getegid
+ data8 sys_acct
+ data8 sys_ioctl // 1065
+ data8 sys_fcntl
+ data8 sys_umask
+ data8 sys_chroot
+ data8 sys_ustat
+ data8 sys_dup2 // 1070
+ data8 sys_setreuid
+ data8 sys_setregid
+ data8 sys_getresuid
+ data8 sys_setresuid
+ data8 sys_getresgid // 1075
+ data8 sys_setresgid
+ data8 sys_getgroups
+ data8 sys_setgroups
+ data8 sys_getpgid
+ data8 sys_setpgid // 1080
+ data8 sys_setsid
+ data8 sys_getsid
+ data8 sys_sethostname
+ data8 sys_setrlimit
+ data8 sys_getrlimit // 1085
+ data8 sys_getrusage
+ data8 sys_gettimeofday
+ data8 sys_settimeofday
+ data8 sys_select
+ data8 sys_poll // 1090
+ data8 sys_symlink
+ data8 sys_readlink
+ data8 sys_uselib
+ data8 sys_swapon
+ data8 sys_swapoff // 1095
+ data8 sys_reboot
+ data8 sys_truncate
+ data8 sys_ftruncate
+ data8 sys_fchmod
+ data8 sys_fchown // 1100
+ data8 ia64_getpriority
+ data8 sys_setpriority
+ data8 sys_statfs
+ data8 sys_fstatfs
+ data8 sys_gettid // 1105
+ data8 sys_semget
+ data8 sys_semop
+ data8 sys_semctl
+ data8 sys_msgget
+ data8 sys_msgsnd // 1110
+ data8 sys_msgrcv
+ data8 sys_msgctl
+ data8 sys_shmget
+ data8 ia64_shmat
+ data8 sys_shmdt // 1115
+ data8 sys_shmctl
+ data8 sys_syslog
+ data8 sys_setitimer
+ data8 sys_getitimer
+ data8 sys_ni_syscall // 1120 /* was: ia64_oldstat */
+ data8 sys_ni_syscall /* was: ia64_oldlstat */
+ data8 sys_ni_syscall /* was: ia64_oldfstat */
+ data8 sys_vhangup
+ data8 sys_lchown
+ data8 sys_remap_file_pages // 1125
+ data8 sys_wait4
+ data8 sys_sysinfo
+ data8 sys_clone
+ data8 sys_setdomainname
+ data8 sys_newuname // 1130
+ data8 sys_adjtimex
+ data8 sys_ni_syscall /* was: ia64_create_module */
+ data8 sys_init_module
+ data8 sys_delete_module
+ data8 sys_ni_syscall // 1135 /* was: sys_get_kernel_syms */
+ data8 sys_ni_syscall /* was: sys_query_module */
+ data8 sys_quotactl
+ data8 sys_bdflush
+ data8 sys_sysfs
+ data8 sys_personality // 1140
+ data8 sys_ni_syscall // sys_afs_syscall
+ data8 sys_setfsuid
+ data8 sys_setfsgid
+ data8 sys_getdents
+ data8 sys_flock // 1145
+ data8 sys_readv
+ data8 sys_writev
+ data8 sys_pread64
+ data8 sys_pwrite64
+ data8 sys_sysctl // 1150
+ data8 sys_mmap
+ data8 sys_munmap
+ data8 sys_mlock
+ data8 sys_mlockall
+ data8 sys_mprotect // 1155
+ data8 ia64_mremap
+ data8 sys_msync
+ data8 sys_munlock
+ data8 sys_munlockall
+ data8 sys_sched_getparam // 1160
+ data8 sys_sched_setparam
+ data8 sys_sched_getscheduler
+ data8 sys_sched_setscheduler
+ data8 sys_sched_yield
+ data8 sys_sched_get_priority_max // 1165
+ data8 sys_sched_get_priority_min
+ data8 sys_sched_rr_get_interval
+ data8 sys_nanosleep
+ data8 sys_nfsservctl
+ data8 sys_prctl // 1170
+ data8 sys_getpagesize
+ data8 sys_mmap2
+ data8 sys_pciconfig_read
+ data8 sys_pciconfig_write
+ data8 sys_perfmonctl // 1175
+ data8 sys_sigaltstack
+ data8 sys_rt_sigaction
+ data8 sys_rt_sigpending
+ data8 sys_rt_sigprocmask
+ data8 sys_rt_sigqueueinfo // 1180
+ data8 sys_rt_sigreturn
+ data8 sys_rt_sigsuspend
+ data8 sys_rt_sigtimedwait
+ data8 sys_getcwd
+ data8 sys_capget // 1185
+ data8 sys_capset
+ data8 sys_sendfile64
+ data8 sys_ni_syscall // sys_getpmsg (STREAMS)
+ data8 sys_ni_syscall // sys_putpmsg (STREAMS)
+ data8 sys_socket // 1190
+ data8 sys_bind
+ data8 sys_connect
+ data8 sys_listen
+ data8 sys_accept
+ data8 sys_getsockname // 1195
+ data8 sys_getpeername
+ data8 sys_socketpair
+ data8 sys_send
+ data8 sys_sendto
+ data8 sys_recv // 1200
+ data8 sys_recvfrom
+ data8 sys_shutdown
+ data8 sys_setsockopt
+ data8 sys_getsockopt
+ data8 sys_sendmsg // 1205
+ data8 sys_recvmsg
+ data8 sys_pivot_root
+ data8 sys_mincore
+ data8 sys_madvise
+ data8 sys_newstat // 1210
+ data8 sys_newlstat
+ data8 sys_newfstat
+ data8 sys_clone2
+ data8 sys_getdents64
+ data8 sys_getunwind // 1215
+ data8 sys_readahead
+ data8 sys_setxattr
+ data8 sys_lsetxattr
+ data8 sys_fsetxattr
+ data8 sys_getxattr // 1220
+ data8 sys_lgetxattr
+ data8 sys_fgetxattr
+ data8 sys_listxattr
+ data8 sys_llistxattr
+ data8 sys_flistxattr // 1225
+ data8 sys_removexattr
+ data8 sys_lremovexattr
+ data8 sys_fremovexattr
+ data8 sys_tkill
+ data8 sys_futex // 1230
+ data8 sys_sched_setaffinity
+ data8 sys_sched_getaffinity
+ data8 sys_set_tid_address
+ data8 sys_fadvise64_64
+ data8 sys_tgkill // 1235
+ data8 sys_exit_group
+ data8 sys_lookup_dcookie
+ data8 sys_io_setup
+ data8 sys_io_destroy
+ data8 sys_io_getevents // 1240
+ data8 sys_io_submit
+ data8 sys_io_cancel
+ data8 sys_epoll_create
+ data8 sys_epoll_ctl
+ data8 sys_epoll_wait // 1245
+ data8 sys_restart_syscall
+ data8 sys_semtimedop
+ data8 sys_timer_create
+ data8 sys_timer_settime
+ data8 sys_timer_gettime // 1250
+ data8 sys_timer_getoverrun
+ data8 sys_timer_delete
+ data8 sys_clock_settime
+ data8 sys_clock_gettime
+ data8 sys_clock_getres // 1255
+ data8 sys_clock_nanosleep
+ data8 sys_fstatfs64
+ data8 sys_statfs64
+ data8 sys_mbind
+ data8 sys_get_mempolicy // 1260
+ data8 sys_set_mempolicy
+ data8 sys_mq_open
+ data8 sys_mq_unlink
+ data8 sys_mq_timedsend
+ data8 sys_mq_timedreceive // 1265
+ data8 sys_mq_notify
+ data8 sys_mq_getsetattr
+ data8 sys_ni_syscall // reserved for kexec_load
+ data8 sys_ni_syscall // reserved for vserver
+ data8 sys_waitid // 1270
+ data8 sys_add_key
+ data8 sys_request_key
+ data8 sys_keyctl
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall // 1275
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+
+ .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h
new file mode 100644
index 00000000000..6d4ecec989b
--- /dev/null
+++ b/arch/ia64/kernel/entry.h
@@ -0,0 +1,82 @@
+#include <linux/config.h>
+
+/*
+ * Preserved registers that are shared between code in ivt.S and
+ * entry.S. Be careful not to step on these!
+ */
+#define PRED_LEAVE_SYSCALL 1 /* TRUE iff leave from syscall */
+#define PRED_KERNEL_STACK 2 /* returning to kernel-stacks? */
+#define PRED_USER_STACK 3 /* returning to user-stacks? */
+#define PRED_SYSCALL 4 /* inside a system call? */
+#define PRED_NON_SYSCALL 5 /* complement of PRED_SYSCALL */
+
+#ifdef __ASSEMBLY__
+# define PASTE2(x,y) x##y
+# define PASTE(x,y) PASTE2(x,y)
+
+# define pLvSys PASTE(p,PRED_LEAVE_SYSCALL)
+# define pKStk PASTE(p,PRED_KERNEL_STACK)
+# define pUStk PASTE(p,PRED_USER_STACK)
+# define pSys PASTE(p,PRED_SYSCALL)
+# define pNonSys PASTE(p,PRED_NON_SYSCALL)
+#endif
+
+#define PT(f) (IA64_PT_REGS_##f##_OFFSET)
+#define SW(f) (IA64_SWITCH_STACK_##f##_OFFSET)
+
+#define PT_REGS_SAVES(off) \
+ .unwabi 3, 'i'; \
+ .fframe IA64_PT_REGS_SIZE+16+(off); \
+ .spillsp rp, PT(CR_IIP)+16+(off); \
+ .spillsp ar.pfs, PT(CR_IFS)+16+(off); \
+ .spillsp ar.unat, PT(AR_UNAT)+16+(off); \
+ .spillsp ar.fpsr, PT(AR_FPSR)+16+(off); \
+ .spillsp pr, PT(PR)+16+(off);
+
+#define PT_REGS_UNWIND_INFO(off) \
+ .prologue; \
+ PT_REGS_SAVES(off); \
+ .body
+
+#define SWITCH_STACK_SAVES(off) \
+ .savesp ar.unat,SW(CALLER_UNAT)+16+(off); \
+ .savesp ar.fpsr,SW(AR_FPSR)+16+(off); \
+ .spillsp f2,SW(F2)+16+(off); .spillsp f3,SW(F3)+16+(off); \
+ .spillsp f4,SW(F4)+16+(off); .spillsp f5,SW(F5)+16+(off); \
+ .spillsp f16,SW(F16)+16+(off); .spillsp f17,SW(F17)+16+(off); \
+ .spillsp f18,SW(F18)+16+(off); .spillsp f19,SW(F19)+16+(off); \
+ .spillsp f20,SW(F20)+16+(off); .spillsp f21,SW(F21)+16+(off); \
+ .spillsp f22,SW(F22)+16+(off); .spillsp f23,SW(F23)+16+(off); \
+ .spillsp f24,SW(F24)+16+(off); .spillsp f25,SW(F25)+16+(off); \
+ .spillsp f26,SW(F26)+16+(off); .spillsp f27,SW(F27)+16+(off); \
+ .spillsp f28,SW(F28)+16+(off); .spillsp f29,SW(F29)+16+(off); \
+ .spillsp f30,SW(F30)+16+(off); .spillsp f31,SW(F31)+16+(off); \
+ .spillsp r4,SW(R4)+16+(off); .spillsp r5,SW(R5)+16+(off); \
+ .spillsp r6,SW(R6)+16+(off); .spillsp r7,SW(R7)+16+(off); \
+ .spillsp b0,SW(B0)+16+(off); .spillsp b1,SW(B1)+16+(off); \
+ .spillsp b2,SW(B2)+16+(off); .spillsp b3,SW(B3)+16+(off); \
+ .spillsp b4,SW(B4)+16+(off); .spillsp b5,SW(B5)+16+(off); \
+ .spillsp ar.pfs,SW(AR_PFS)+16+(off); .spillsp ar.lc,SW(AR_LC)+16+(off); \
+ .spillsp @priunat,SW(AR_UNAT)+16+(off); \
+ .spillsp ar.rnat,SW(AR_RNAT)+16+(off); \
+ .spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off); \
+ .spillsp pr,SW(PR)+16+(off))
+
+#define DO_SAVE_SWITCH_STACK \
+ movl r28=1f; \
+ ;; \
+ .fframe IA64_SWITCH_STACK_SIZE; \
+ adds sp=-IA64_SWITCH_STACK_SIZE,sp; \
+ mov.ret.sptk b7=r28,1f; \
+ SWITCH_STACK_SAVES(0); \
+ br.cond.sptk.many save_switch_stack; \
+1:
+
+#define DO_LOAD_SWITCH_STACK \
+ movl r28=1f; \
+ ;; \
+ invala; \
+ mov.ret.sptk b7=r28,1f; \
+ br.cond.sptk.many load_switch_stack; \
+1: .restore sp; \
+ adds sp=IA64_SWITCH_STACK_SIZE,sp
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
new file mode 100644
index 00000000000..0d8650f7fce
--- /dev/null
+++ b/arch/ia64/kernel/fsys.S
@@ -0,0 +1,884 @@
+/*
+ * This file contains the light-weight system call handlers (fsyscall-handlers).
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 25-Sep-03 davidm Implement fsys_rt_sigprocmask().
+ * 18-Feb-03 louisk Implement fsys_gettimeofday().
+ * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more,
+ * probably broke it along the way... ;-)
+ * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make
+ * it capable of using memory based clocks without falling back to C code.
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/errno.h>
+#include <asm/offsets.h>
+#include <asm/percpu.h>
+#include <asm/thread_info.h>
+#include <asm/sal.h>
+#include <asm/signal.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+#include "entry.h"
+
+/*
+ * See Documentation/ia64/fsys.txt for details on fsyscalls.
+ *
+ * On entry to an fsyscall handler:
+ * r10 = 0 (i.e., defaults to "successful syscall return")
+ * r11 = saved ar.pfs (a user-level value)
+ * r15 = system call number
+ * r16 = "current" task pointer (in normal kernel-mode, this is in r13)
+ * r32-r39 = system call arguments
+ * b6 = return address (a user-level value)
+ * ar.pfs = previous frame-state (a user-level value)
+ * PSR.be = cleared to zero (i.e., little-endian byte order is in effect)
+ * all other registers may contain values passed in from user-mode
+ *
+ * On return from an fsyscall handler:
+ * r11 = saved ar.pfs (as passed into the fsyscall handler)
+ * r15 = system call number (as passed into the fsyscall handler)
+ * r32-r39 = system call arguments (as passed into the fsyscall handler)
+ * b6 = return address (as passed into the fsyscall handler)
+ * ar.pfs = previous frame-state (as passed into the fsyscall handler)
+ */
+
+ENTRY(fsys_ni_syscall)
+ .prologue
+ .altrp b6
+ .body
+ mov r8=ENOSYS
+ mov r10=-1
+ FSYS_RETURN
+END(fsys_ni_syscall)
+
+ENTRY(fsys_getpid)
+ .prologue
+ .altrp b6
+ .body
+ add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+ ;;
+ ld4 r9=[r9]
+ add r8=IA64_TASK_TGID_OFFSET,r16
+ ;;
+ and r9=TIF_ALLWORK_MASK,r9
+ ld4 r8=[r8] // r8 = current->tgid
+ ;;
+ cmp.ne p8,p0=0,r9
+(p8) br.spnt.many fsys_fallback_syscall
+ FSYS_RETURN
+END(fsys_getpid)
+
+ENTRY(fsys_getppid)
+ .prologue
+ .altrp b6
+ .body
+ add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+ ;;
+ ld8 r17=[r17] // r17 = current->group_leader
+ add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+ ;;
+
+ ld4 r9=[r9]
+ add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
+ ;;
+ and r9=TIF_ALLWORK_MASK,r9
+
+1: ld8 r18=[r17] // r18 = current->group_leader->real_parent
+ ;;
+ cmp.ne p8,p0=0,r9
+ add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = &current->group_leader->real_parent->tgid
+ ;;
+
+ /*
+ * The .acq is needed to ensure that the read of tgid has returned its data before
+ * we re-check "real_parent".
+ */
+ ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid
+#ifdef CONFIG_SMP
+ /*
+ * Re-read current->group_leader->real_parent.
+ */
+ ld8 r19=[r17] // r19 = current->group_leader->real_parent
+(p8) br.spnt.many fsys_fallback_syscall
+ ;;
+ cmp.ne p6,p0=r18,r19 // did real_parent change?
+ mov r19=0 // i must not leak kernel bits...
+(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check
+ ;;
+ mov r17=0 // i must not leak kernel bits...
+ mov r18=0 // i must not leak kernel bits...
+#else
+ mov r17=0 // i must not leak kernel bits...
+ mov r18=0 // i must not leak kernel bits...
+ mov r19=0 // i must not leak kernel bits...
+#endif
+ FSYS_RETURN
+END(fsys_getppid)
+
+ENTRY(fsys_set_tid_address)
+ .prologue
+ .altrp b6
+ .body
+ add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+ ;;
+ ld4 r9=[r9]
+ tnat.z p6,p7=r32 // check argument register for being NaT
+ ;;
+ and r9=TIF_ALLWORK_MASK,r9
+ add r8=IA64_TASK_PID_OFFSET,r16
+ add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
+ ;;
+ ld4 r8=[r8]
+ cmp.ne p8,p0=0,r9
+ mov r17=-1
+ ;;
+(p6) st8 [r18]=r32
+(p7) st8 [r18]=r17
+(p8) br.spnt.many fsys_fallback_syscall
+ ;;
+ mov r17=0 // i must not leak kernel bits...
+ mov r18=0 // i must not leak kernel bits...
+ FSYS_RETURN
+END(fsys_set_tid_address)
+
+/*
+ * Ensure that the time interpolator structure is compatible with the asm code
+ */
+#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
+ || IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
+#error fsys_gettimeofday incompatible with changes to struct time_interpolator
+#endif
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_DIVIDE_BY_1000 0x4000
+#define CLOCK_ADD_MONOTONIC 0x8000
+
+ENTRY(fsys_gettimeofday)
+ .prologue
+ .altrp b6
+ .body
+ mov r31 = r32
+ tnat.nz p6,p0 = r33 // guard against NaT argument
+(p6) br.cond.spnt.few .fail_einval
+ mov r30 = CLOCK_DIVIDE_BY_1000
+ ;;
+.gettime:
+ // Register map
+ // Incoming r31 = pointer to address where to place result
+ // r30 = flags determining how time is processed
+ // r2,r3 = temp r4-r7 preserved
+ // r8 = result nanoseconds
+ // r9 = result seconds
+ // r10 = temporary storage for clock difference
+ // r11 = preserved: saved ar.pfs
+ // r12 = preserved: memory stack
+ // r13 = preserved: thread pointer
+ // r14 = address of mask / mask
+ // r15 = preserved: system call number
+ // r16 = preserved: current task pointer
+ // r17 = wall to monotonic use
+ // r18 = time_interpolator->offset
+ // r19 = address of wall_to_monotonic
+ // r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
+ // r21 = shift factor
+ // r22 = address of time interpolator->last_counter
+ // r23 = address of time_interpolator->last_cycle
+ // r24 = adress of time_interpolator->offset
+ // r25 = last_cycle value
+ // r26 = last_counter value
+ // r27 = pointer to xtime
+ // r28 = sequence number at the beginning of critcal section
+ // r29 = address of seqlock
+ // r30 = time processing flags / memory address
+ // r31 = pointer to result
+ // Predicates
+ // p6,p7 short term use
+ // p8 = timesource ar.itc
+ // p9 = timesource mmio64
+ // p10 = timesource mmio32
+ // p11 = timesource not to be handled by asm code
+ // p12 = memory time source ( = p9 | p10)
+ // p13 = do cmpxchg with time_interpolator_last_cycle
+ // p14 = Divide by 1000
+ // p15 = Add monotonic
+ //
+ // Note that instructions are optimized for McKinley. McKinley can process two
+ // bundles simultaneously and therefore we continuously try to feed the CPU
+ // two bundles and then a stop.
+ tnat.nz p6,p0 = r31 // branch deferred since it does not fit into bundle structure
+ mov pr = r30,0xc000 // Set predicates according to function
+ add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
+ movl r20 = time_interpolator
+ ;;
+ ld8 r20 = [r20] // get pointer to time_interpolator structure
+ movl r29 = xtime_lock
+ ld4 r2 = [r2] // process work pending flags
+ movl r27 = xtime
+ ;; // only one bundle here
+ ld8 r21 = [r20] // first quad with control information
+ and r2 = TIF_ALLWORK_MASK,r2
+(p6) br.cond.spnt.few .fail_einval // deferred branch
+ ;;
+ add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
+ extr r3 = r21,32,32 // time_interpolator->nsec_per_cyc
+ extr r8 = r21,0,16 // time_interpolator->source
+ cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
+(p6) br.cond.spnt.many fsys_fallback_syscall
+ ;;
+ cmp.eq p8,p12 = 0,r8 // Check for cpu timer
+ cmp.eq p9,p0 = 1,r8 // MMIO64 ?
+ extr r2 = r21,24,8 // time_interpolator->jitter
+ cmp.eq p10,p0 = 2,r8 // MMIO32 ?
+ cmp.ltu p11,p0 = 2,r8 // function or other clock
+(p11) br.cond.spnt.many fsys_fallback_syscall
+ ;;
+ setf.sig f7 = r3 // Setup for scaling of counter
+(p15) movl r19 = wall_to_monotonic
+(p12) ld8 r30 = [r10]
+ cmp.ne p13,p0 = r2,r0 // need jitter compensation?
+ extr r21 = r21,16,8 // shift factor
+ ;;
+.time_redo:
+ .pred.rel.mutex p8,p9,p10
+ ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes
+(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
+ add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
+(p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues..
+(p10) ld4 r2 = [r30] // readw(ti->address)
+(p13) add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
+ ;; // could be removed by moving the last add upward
+ ld8 r26 = [r22] // time_interpolator->last_counter
+(p13) ld8 r25 = [r23] // time interpolator->last_cycle
+ add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
+(p15) ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
+ ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
+ add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
+ ;;
+ ld8 r18 = [r24] // time_interpolator->offset
+ ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET // xtime.tv_nsec
+(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
+ ;;
+ ld8 r14 = [r14] // time_interpolator->mask
+(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
+ sub r10 = r2,r26 // current_counter - last_counter
+ ;;
+(p6) sub r10 = r25,r26 // time we got was less than last_cycle
+(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg
+ ;;
+ and r10 = r10,r14 // Apply mask
+ ;;
+ setf.sig f8 = r10
+ nop.i 123
+ ;;
+(p7) cmpxchg8.rel r3 = [r23],r2,ar.ccv
+EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have spare time
+ xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
+(p15) add r9 = r9,r17 // Add wall to monotonic.secs to result secs
+ ;;
+(p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
+(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo
+ // simulate tbit.nz.or p7,p0 = r28,0
+ and r28 = ~1,r28 // Make sequence even to force retry if odd
+ getf.sig r2 = f8
+ mf
+ add r8 = r8,r18 // Add time interpolator offset
+ ;;
+ ld4 r10 = [r29] // xtime_lock.sequence
+(p15) add r8 = r8, r17 // Add monotonic.nsecs to nsecs
+ shr.u r2 = r2,r21
+ ;; // overloaded 3 bundles!
+ // End critical section.
+ add r8 = r8,r2 // Add xtime.nsecs
+ cmp4.ne.or p7,p0 = r28,r10
+(p7) br.cond.dpnt.few .time_redo // sequence number changed ?
+ // Now r8=tv->tv_nsec and r9=tv->tv_sec
+ mov r10 = r0
+ movl r2 = 1000000000
+ add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
+(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack
+ ;;
+.time_normalize:
+ mov r21 = r8
+ cmp.ge p6,p0 = r8,r2
+(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting some time
+ ;;
+(p14) setf.sig f8 = r20
+(p6) sub r8 = r8,r2
+(p6) add r9 = 1,r9 // two nops before the branch.
+(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod
+(p6) br.cond.dpnt.few .time_normalize
+ ;;
+ // Divided by 8 though shift. Now divide by 125
+ // The compiler was able to do that with a multiply
+ // and a shift and we do the same
+EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
+(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it...
+ ;;
+ mov r8 = r0
+(p14) getf.sig r2 = f8
+ ;;
+(p14) shr.u r21 = r2, 4
+ ;;
+EX(.fail_efault, st8 [r31] = r9)
+EX(.fail_efault, st8 [r23] = r21)
+ FSYS_RETURN
+.fail_einval:
+ mov r8 = EINVAL
+ mov r10 = -1
+ FSYS_RETURN
+.fail_efault:
+ mov r8 = EFAULT
+ mov r10 = -1
+ FSYS_RETURN
+END(fsys_gettimeofday)
+
+ENTRY(fsys_clock_gettime)
+ .prologue
+ .altrp b6
+ .body
+ cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
+ // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
+(p6) br.spnt.few fsys_fallback_syscall
+ mov r31 = r33
+ shl r30 = r32,15
+ br.many .gettime
+END(fsys_clock_gettime)
+
+/*
+ * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
+ */
+#if _NSIG_WORDS != 1
+# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
+#endif
+ENTRY(fsys_rt_sigprocmask)
+ .prologue
+ .altrp b6
+ .body
+
+ add r2=IA64_TASK_BLOCKED_OFFSET,r16
+ add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+ cmp4.ltu p6,p0=SIG_SETMASK,r32
+
+ cmp.ne p15,p0=r0,r34 // oset != NULL?
+ tnat.nz p8,p0=r34
+ add r31=IA64_TASK_SIGHAND_OFFSET,r16
+ ;;
+ ld8 r3=[r2] // read/prefetch current->blocked
+ ld4 r9=[r9]
+ tnat.nz.or p6,p0=r35
+
+ cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
+ tnat.nz.or p6,p0=r32
+(p6) br.spnt.few .fail_einval // fail with EINVAL
+ ;;
+#ifdef CONFIG_SMP
+ ld8 r31=[r31] // r31 <- current->sighand
+#endif
+ and r9=TIF_ALLWORK_MASK,r9
+ tnat.nz.or p8,p0=r33
+ ;;
+ cmp.ne p7,p0=0,r9
+ cmp.eq p6,p0=r0,r33 // set == NULL?
+ add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
+(p8) br.spnt.few .fail_efault // fail with EFAULT
+(p7) br.spnt.many fsys_fallback_syscall // got pending kernel work...
+(p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask
+
+ /* Argh, we actually have to do some work and _update_ the signal mask: */
+
+EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set
+EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set
+ mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
+ ;;
+
+ rsm psr.i // mask interrupt delivery
+ mov ar.ccv=0
+ andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP
+
+#ifdef CONFIG_SMP
+ mov r17=1
+ ;;
+ cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock
+ mov r8=EINVAL // default to EINVAL
+ ;;
+ ld8 r3=[r2] // re-read current->blocked now that we hold the lock
+ cmp4.ne p6,p0=r18,r0
+(p6) br.cond.spnt.many .lock_contention
+ ;;
+#else
+ ld8 r3=[r2] // re-read current->blocked now that we hold the lock
+ mov r8=EINVAL // default to EINVAL
+#endif
+ add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
+ add r19=IA64_TASK_SIGNAL_OFFSET,r16
+ cmp4.eq p6,p0=SIG_BLOCK,r32
+ ;;
+ ld8 r19=[r19] // r19 <- current->signal
+ cmp4.eq p7,p0=SIG_UNBLOCK,r32
+ cmp4.eq p8,p0=SIG_SETMASK,r32
+ ;;
+ ld8 r18=[r18] // r18 <- current->pending.signal
+ .pred.rel.mutex p6,p7,p8
+(p6) or r14=r3,r14 // SIG_BLOCK
+(p7) andcm r14=r3,r14 // SIG_UNBLOCK
+
+(p8) mov r14=r14 // SIG_SETMASK
+(p6) mov r8=0 // clear error code
+ // recalc_sigpending()
+ add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
+
+ add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
+ ;;
+ ld4 r17=[r17] // r17 <- current->signal->group_stop_count
+(p7) mov r8=0 // clear error code
+
+ ld8 r19=[r19] // r19 <- current->signal->shared_pending
+ ;;
+ cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)?
+(p8) mov r8=0 // clear error code
+
+ or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending
+ ;;
+ // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
+ andcm r18=r18,r14
+ add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+ ;;
+
+(p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending
+ mov r19=0 // i must not leak kernel bits...
+(p6) br.cond.dpnt.many .sig_pending
+ ;;
+
+1: ld4 r17=[r9] // r17 <- current->thread_info->flags
+ ;;
+ mov ar.ccv=r17
+ and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING)
+ ;;
+
+ st8 [r2]=r14 // update current->blocked with new mask
+ cmpxchg4.acq r14=[r9],r18,ar.ccv // current->thread_info->flags <- r18
+ ;;
+ cmp.ne p6,p0=r17,r14 // update failed?
+(p6) br.cond.spnt.few 1b // yes -> retry
+
+#ifdef CONFIG_SMP
+ st4.rel [r31]=r0 // release the lock
+#endif
+ ssm psr.i
+ ;;
+
+ srlz.d // ensure psr.i is set again
+ mov r18=0 // i must not leak kernel bits...
+
+.store_mask:
+EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset
+EX(.fail_efault, (p15) st8 [r34]=r3)
+ mov r2=0 // i must not leak kernel bits...
+ mov r3=0 // i must not leak kernel bits...
+ mov r8=0 // return 0
+ mov r9=0 // i must not leak kernel bits...
+ mov r14=0 // i must not leak kernel bits...
+ mov r17=0 // i must not leak kernel bits...
+ mov r31=0 // i must not leak kernel bits...
+ FSYS_RETURN
+
+.sig_pending:
+#ifdef CONFIG_SMP
+ st4.rel [r31]=r0 // release the lock
+#endif
+ ssm psr.i
+ ;;
+ srlz.d
+ br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall
+
+#ifdef CONFIG_SMP
+.lock_contention:
+ /* Rather than spinning here, fall back on doing a heavy-weight syscall. */
+ ssm psr.i
+ ;;
+ srlz.d
+ br.sptk.many fsys_fallback_syscall
+#endif
+END(fsys_rt_sigprocmask)
+
+ENTRY(fsys_fallback_syscall)
+ .prologue
+ .altrp b6
+ .body
+ /*
+ * We only get here from light-weight syscall handlers. Thus, we already
+ * know that r15 contains a valid syscall number. No need to re-check.
+ */
+ adds r17=-1024,r15
+ movl r14=sys_call_table
+ ;;
+ rsm psr.i
+ shladd r18=r17,3,r14
+ ;;
+ ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point
+ mov r29=psr // read psr (12 cyc load latency)
+ mov r27=ar.rsc
+ mov r21=ar.fpsr
+ mov r26=ar.pfs
+END(fsys_fallback_syscall)
+ /* FALL THROUGH */
+GLOBAL_ENTRY(fsys_bubble_down)
+ .prologue
+ .altrp b6
+ .body
+ /*
+ * We get here for syscalls that don't have a lightweight handler. For those, we
+ * need to bubble down into the kernel and that requires setting up a minimal
+ * pt_regs structure, and initializing the CPU state more or less as if an
+ * interruption had occurred. To make syscall-restarts work, we setup pt_regs
+ * such that cr_iip points to the second instruction in syscall_via_break.
+ * Decrementing the IP hence will restart the syscall via break and not
+ * decrementing IP will return us to the caller, as usual. Note that we preserve
+ * the value of psr.pp rather than initializing it from dcr.pp. This makes it
+ * possible to distinguish fsyscall execution from other privileged execution.
+ *
+ * On entry:
+ * - normal fsyscall handler register usage, except that we also have:
+ * - r18: address of syscall entry point
+ * - r21: ar.fpsr
+ * - r26: ar.pfs
+ * - r27: ar.rsc
+ * - r29: psr
+ */
+# define PSR_PRESERVED_BITS (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
+ | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \
+ | IA64_PSR_IC)
+ /*
+ * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have
+ * to synthesize.
+ */
+# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
+ | IA64_PSR_BN | IA64_PSR_I)
+
+ invala
+ movl r8=PSR_ONE_BITS
+
+ mov r25=ar.unat // save ar.unat (5 cyc)
+ movl r9=PSR_PRESERVED_BITS
+
+ mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0
+ movl r28=__kernel_syscall_via_break
+ ;;
+ mov r23=ar.bspstore // save ar.bspstore (12 cyc)
+ mov r31=pr // save pr (2 cyc)
+ mov r20=r1 // save caller's gp in r20
+ ;;
+ mov r2=r16 // copy current task addr to addl-addressable register
+ and r9=r9,r29
+ mov r19=b6 // save b6 (2 cyc)
+ ;;
+ mov psr.l=r9 // slam the door (17 cyc to srlz.i)
+ or r29=r8,r29 // construct cr.ipsr value to save
+ addl r22=IA64_RBS_OFFSET,r2 // compute base of RBS
+ ;;
+ // GAS reports a spurious RAW hazard on the read of ar.rnat because it thinks
+ // we may be reading ar.itc after writing to psr.l. Avoid that message with
+ // this directive:
+ dv_serialize_data
+ mov.m r24=ar.rnat // read ar.rnat (5 cyc lat)
+ lfetch.fault.excl.nt1 [r22]
+ adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2
+
+ // ensure previous insn group is issued before we stall for srlz.i:
+ ;;
+ srlz.i // ensure new psr.l has been established
+ /////////////////////////////////////////////////////////////////////////////
+ ////////// from this point on, execution is not interruptible anymore
+ /////////////////////////////////////////////////////////////////////////////
+ addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack
+ cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1
+ ;;
+ st1 [r16]=r0 // clear current->thread.on_ustack flag
+ mov ar.bspstore=r22 // switch to kernel RBS
+ mov b6=r18 // copy syscall entry-point to b6 (7 cyc)
+ add r3=TI_FLAGS+IA64_TASK_SIZE,r2
+ ;;
+ ld4 r3=[r3] // r2 = current_thread_info()->flags
+ mov r18=ar.bsp // save (kernel) ar.bsp (12 cyc)
+ mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0
+ br.call.sptk.many b7=ia64_syscall_setup
+ ;;
+ ssm psr.i
+ movl r2=ia64_ret_from_syscall
+ ;;
+ mov rp=r2 // set the real return addr
+ tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
+ ;;
+(p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8
+(p8) br.call.sptk.many b6=b6 // ignore this return addr
+ br.cond.sptk ia64_trace_syscall
+END(fsys_bubble_down)
+
+ .rodata
+ .align 8
+ .globl fsyscall_table
+
+ data8 fsys_bubble_down
+fsyscall_table:
+ data8 fsys_ni_syscall
+ data8 0 // exit // 1025
+ data8 0 // read
+ data8 0 // write
+ data8 0 // open
+ data8 0 // close
+ data8 0 // creat // 1030
+ data8 0 // link
+ data8 0 // unlink
+ data8 0 // execve
+ data8 0 // chdir
+ data8 0 // fchdir // 1035
+ data8 0 // utimes
+ data8 0 // mknod
+ data8 0 // chmod
+ data8 0 // chown
+ data8 0 // lseek // 1040
+ data8 fsys_getpid // getpid
+ data8 fsys_getppid // getppid
+ data8 0 // mount
+ data8 0 // umount
+ data8 0 // setuid // 1045
+ data8 0 // getuid
+ data8 0 // geteuid
+ data8 0 // ptrace
+ data8 0 // access
+ data8 0 // sync // 1050
+ data8 0 // fsync
+ data8 0 // fdatasync
+ data8 0 // kill
+ data8 0 // rename
+ data8 0 // mkdir // 1055
+ data8 0 // rmdir
+ data8 0 // dup
+ data8 0 // pipe
+ data8 0 // times
+ data8 0 // brk // 1060
+ data8 0 // setgid
+ data8 0 // getgid
+ data8 0 // getegid
+ data8 0 // acct
+ data8 0 // ioctl // 1065
+ data8 0 // fcntl
+ data8 0 // umask
+ data8 0 // chroot
+ data8 0 // ustat
+ data8 0 // dup2 // 1070
+ data8 0 // setreuid
+ data8 0 // setregid
+ data8 0 // getresuid
+ data8 0 // setresuid
+ data8 0 // getresgid // 1075
+ data8 0 // setresgid
+ data8 0 // getgroups
+ data8 0 // setgroups
+ data8 0 // getpgid
+ data8 0 // setpgid // 1080
+ data8 0 // setsid
+ data8 0 // getsid
+ data8 0 // sethostname
+ data8 0 // setrlimit
+ data8 0 // getrlimit // 1085
+ data8 0 // getrusage
+ data8 fsys_gettimeofday // gettimeofday
+ data8 0 // settimeofday
+ data8 0 // select
+ data8 0 // poll // 1090
+ data8 0 // symlink
+ data8 0 // readlink
+ data8 0 // uselib
+ data8 0 // swapon
+ data8 0 // swapoff // 1095
+ data8 0 // reboot
+ data8 0 // truncate
+ data8 0 // ftruncate
+ data8 0 // fchmod
+ data8 0 // fchown // 1100
+ data8 0 // getpriority
+ data8 0 // setpriority
+ data8 0 // statfs
+ data8 0 // fstatfs
+ data8 0 // gettid // 1105
+ data8 0 // semget
+ data8 0 // semop
+ data8 0 // semctl
+ data8 0 // msgget
+ data8 0 // msgsnd // 1110
+ data8 0 // msgrcv
+ data8 0 // msgctl
+ data8 0 // shmget
+ data8 0 // shmat
+ data8 0 // shmdt // 1115
+ data8 0 // shmctl
+ data8 0 // syslog
+ data8 0 // setitimer
+ data8 0 // getitimer
+ data8 0 // 1120
+ data8 0
+ data8 0
+ data8 0 // vhangup
+ data8 0 // lchown
+ data8 0 // remap_file_pages // 1125
+ data8 0 // wait4
+ data8 0 // sysinfo
+ data8 0 // clone
+ data8 0 // setdomainname
+ data8 0 // newuname // 1130
+ data8 0 // adjtimex
+ data8 0
+ data8 0 // init_module
+ data8 0 // delete_module
+ data8 0 // 1135
+ data8 0
+ data8 0 // quotactl
+ data8 0 // bdflush
+ data8 0 // sysfs
+ data8 0 // personality // 1140
+ data8 0 // afs_syscall
+ data8 0 // setfsuid
+ data8 0 // setfsgid
+ data8 0 // getdents
+ data8 0 // flock // 1145
+ data8 0 // readv
+ data8 0 // writev
+ data8 0 // pread64
+ data8 0 // pwrite64
+ data8 0 // sysctl // 1150
+ data8 0 // mmap
+ data8 0 // munmap
+ data8 0 // mlock
+ data8 0 // mlockall
+ data8 0 // mprotect // 1155
+ data8 0 // mremap
+ data8 0 // msync
+ data8 0 // munlock
+ data8 0 // munlockall
+ data8 0 // sched_getparam // 1160
+ data8 0 // sched_setparam
+ data8 0 // sched_getscheduler
+ data8 0 // sched_setscheduler
+ data8 0 // sched_yield
+ data8 0 // sched_get_priority_max // 1165
+ data8 0 // sched_get_priority_min
+ data8 0 // sched_rr_get_interval
+ data8 0 // nanosleep
+ data8 0 // nfsservctl
+ data8 0 // prctl // 1170
+ data8 0 // getpagesize
+ data8 0 // mmap2
+ data8 0 // pciconfig_read
+ data8 0 // pciconfig_write
+ data8 0 // perfmonctl // 1175
+ data8 0 // sigaltstack
+ data8 0 // rt_sigaction
+ data8 0 // rt_sigpending
+ data8 fsys_rt_sigprocmask // rt_sigprocmask
+ data8 0 // rt_sigqueueinfo // 1180
+ data8 0 // rt_sigreturn
+ data8 0 // rt_sigsuspend
+ data8 0 // rt_sigtimedwait
+ data8 0 // getcwd
+ data8 0 // capget // 1185
+ data8 0 // capset
+ data8 0 // sendfile
+ data8 0
+ data8 0
+ data8 0 // socket // 1190
+ data8 0 // bind
+ data8 0 // connect
+ data8 0 // listen
+ data8 0 // accept
+ data8 0 // getsockname // 1195
+ data8 0 // getpeername
+ data8 0 // socketpair
+ data8 0 // send
+ data8 0 // sendto
+ data8 0 // recv // 1200
+ data8 0 // recvfrom
+ data8 0 // shutdown
+ data8 0 // setsockopt
+ data8 0 // getsockopt
+ data8 0 // sendmsg // 1205
+ data8 0 // recvmsg
+ data8 0 // pivot_root
+ data8 0 // mincore
+ data8 0 // madvise
+ data8 0 // newstat // 1210
+ data8 0 // newlstat
+ data8 0 // newfstat
+ data8 0 // clone2
+ data8 0 // getdents64
+ data8 0 // getunwind // 1215
+ data8 0 // readahead
+ data8 0 // setxattr
+ data8 0 // lsetxattr
+ data8 0 // fsetxattr
+ data8 0 // getxattr // 1220
+ data8 0 // lgetxattr
+ data8 0 // fgetxattr
+ data8 0 // listxattr
+ data8 0 // llistxattr
+ data8 0 // flistxattr // 1225
+ data8 0 // removexattr
+ data8 0 // lremovexattr
+ data8 0 // fremovexattr
+ data8 0 // tkill
+ data8 0 // futex // 1230
+ data8 0 // sched_setaffinity
+ data8 0 // sched_getaffinity
+ data8 fsys_set_tid_address // set_tid_address
+ data8 0 // fadvise64_64
+ data8 0 // tgkill // 1235
+ data8 0 // exit_group
+ data8 0 // lookup_dcookie
+ data8 0 // io_setup
+ data8 0 // io_destroy
+ data8 0 // io_getevents // 1240
+ data8 0 // io_submit
+ data8 0 // io_cancel
+ data8 0 // epoll_create
+ data8 0 // epoll_ctl
+ data8 0 // epoll_wait // 1245
+ data8 0 // restart_syscall
+ data8 0 // semtimedop
+ data8 0 // timer_create
+ data8 0 // timer_settime
+ data8 0 // timer_gettime // 1250
+ data8 0 // timer_getoverrun
+ data8 0 // timer_delete
+ data8 0 // clock_settime
+ data8 fsys_clock_gettime // clock_gettime
+ data8 0 // clock_getres // 1255
+ data8 0 // clock_nanosleep
+ data8 0 // fstatfs64
+ data8 0 // statfs64
+ data8 0
+ data8 0 // 1260
+ data8 0
+ data8 0 // mq_open
+ data8 0 // mq_unlink
+ data8 0 // mq_timedsend
+ data8 0 // mq_timedreceive // 1265
+ data8 0 // mq_notify
+ data8 0 // mq_getsetattr
+ data8 0 // kexec_load
+ data8 0
+ data8 0 // 1270
+ data8 0
+ data8 0
+ data8 0
+ data8 0
+ data8 0 // 1275
+ data8 0
+ data8 0
+ data8 0
+ data8 0
+
+ .org fsyscall_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
diff --git a/arch/ia64/kernel/gate-data.S b/arch/ia64/kernel/gate-data.S
new file mode 100644
index 00000000000..258c0a3238f
--- /dev/null
+++ b/arch/ia64/kernel/gate-data.S
@@ -0,0 +1,3 @@
+ .section .data.gate, "aw"
+
+ .incbin "arch/ia64/kernel/gate.so"
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
new file mode 100644
index 00000000000..facf75acdc8
--- /dev/null
+++ b/arch/ia64/kernel/gate.S
@@ -0,0 +1,372 @@
+/*
+ * This file contains the code that gets mapped at the upper end of each task's text
+ * region. For now, it contains the signal trampoline code only.
+ *
+ * Copyright (C) 1999-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/errno.h>
+#include <asm/offsets.h>
+#include <asm/sigcontext.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+/*
+ * We can't easily refer to symbols inside the kernel. To avoid full runtime relocation,
+ * complications with the linker (which likes to create PLT stubs for branches
+ * to targets outside the shared object) and to avoid multi-phase kernel builds, we
+ * simply create minimalistic "patch lists" in special ELF sections.
+ */
+ .section ".data.patch.fsyscall_table", "a"
+ .previous
+#define LOAD_FSYSCALL_TABLE(reg) \
+[1:] movl reg=0; \
+ .xdata4 ".data.patch.fsyscall_table", 1b-.
+
+ .section ".data.patch.brl_fsys_bubble_down", "a"
+ .previous
+#define BRL_COND_FSYS_BUBBLE_DOWN(pr) \
+[1:](pr)brl.cond.sptk 0; \
+ .xdata4 ".data.patch.brl_fsys_bubble_down", 1b-.
+
+GLOBAL_ENTRY(__kernel_syscall_via_break)
+ .prologue
+ .altrp b6
+ .body
+ /*
+ * Note: for (fast) syscall restart to work, the break instruction must be
+ * the first one in the bundle addressed by syscall_via_break.
+ */
+{ .mib
+ break 0x100000
+ nop.i 0
+ br.ret.sptk.many b6
+}
+END(__kernel_syscall_via_break)
+
+/*
+ * On entry:
+ * r11 = saved ar.pfs
+ * r15 = system call #
+ * b0 = saved return address
+ * b6 = return address
+ * On exit:
+ * r11 = saved ar.pfs
+ * r15 = system call #
+ * b0 = saved return address
+ * all other "scratch" registers: undefined
+ * all "preserved" registers: same as on entry
+ */
+
+GLOBAL_ENTRY(__kernel_syscall_via_epc)
+ .prologue
+ .altrp b6
+ .body
+{
+ /*
+ * Note: the kernel cannot assume that the first two instructions in this
+ * bundle get executed. The remaining code must be safe even if
+ * they do not get executed.
+ */
+ adds r17=-1024,r15
+ mov r10=0 // default to successful syscall execution
+ epc
+}
+ ;;
+ rsm psr.be // note: on McKinley "rsm psr.be/srlz.d" is slightly faster than "rum psr.be"
+ LOAD_FSYSCALL_TABLE(r14)
+
+ mov r16=IA64_KR(CURRENT) // 12 cycle read latency
+ tnat.nz p10,p9=r15
+ mov r19=NR_syscalls-1
+ ;;
+ shladd r18=r17,3,r14
+
+ srlz.d
+ cmp.ne p8,p0=r0,r0 // p8 <- FALSE
+ /* Note: if r17 is a NaT, p6 will be set to zero. */
+ cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)?
+ ;;
+(p6) ld8 r18=[r18]
+ mov r21=ar.fpsr
+ add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
+ ;;
+(p6) mov b7=r18
+(p6) tbit.z p8,p0=r18,0
+(p8) br.dptk.many b7
+
+(p6) rsm psr.i
+ mov r27=ar.rsc
+ mov r26=ar.pfs
+ ;;
+ mov r29=psr // read psr (12 cyc load latency)
+/*
+ * brl.cond doesn't work as intended because the linker would convert this branch
+ * into a branch to a PLT. Perhaps there will be a way to avoid this with some
+ * future version of the linker. In the meantime, we just use an indirect branch
+ * instead.
+ */
+#ifdef CONFIG_ITANIUM
+(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
+ ;;
+(p6) mov b7=r14
+(p6) br.sptk.many b7
+#else
+ BRL_COND_FSYS_BUBBLE_DOWN(p6)
+#endif
+
+ mov r10=-1
+(p10) mov r8=EINVAL
+(p9) mov r8=ENOSYS
+ FSYS_RETURN
+END(__kernel_syscall_via_epc)
+
+# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET)
+# define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET)
+# define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET)
+# define SIGHANDLER_OFF (16 + IA64_SIGFRAME_HANDLER_OFFSET)
+# define SIGCONTEXT_OFF (16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET)
+
+# define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET
+# define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET
+# define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET
+# define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET
+# define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET
+# define UNAT_OFF IA64_SIGCONTEXT_AR_UNAT_OFFSET
+# define FPSR_OFF IA64_SIGCONTEXT_AR_FPSR_OFFSET
+# define PR_OFF IA64_SIGCONTEXT_PR_OFFSET
+# define RP_OFF IA64_SIGCONTEXT_IP_OFFSET
+# define SP_OFF IA64_SIGCONTEXT_R12_OFFSET
+# define RBS_BASE_OFF IA64_SIGCONTEXT_RBS_BASE_OFFSET
+# define LOADRS_OFF IA64_SIGCONTEXT_LOADRS_OFFSET
+# define base0 r2
+# define base1 r3
+ /*
+ * When we get here, the memory stack looks like this:
+ *
+ * +===============================+
+ * | |
+ * // struct sigframe //
+ * | |
+ * +-------------------------------+ <-- sp+16
+ * | 16 byte of scratch |
+ * | space |
+ * +-------------------------------+ <-- sp
+ *
+ * The register stack looks _exactly_ the way it looked at the time the signal
+ * occurred. In other words, we're treading on a potential mine-field: each
+ * incoming general register may be a NaT value (including sp, in which case the
+ * process ends up dying with a SIGSEGV).
+ *
+ * The first thing need to do is a cover to get the registers onto the backing
+ * store. Once that is done, we invoke the signal handler which may modify some
+ * of the machine state. After returning from the signal handler, we return
+ * control to the previous context by executing a sigreturn system call. A signal
+ * handler may call the rt_sigreturn() function to directly return to a given
+ * sigcontext. However, the user-level sigreturn() needs to do much more than
+ * calling the rt_sigreturn() system call as it needs to unwind the stack to
+ * restore preserved registers that may have been saved on the signal handler's
+ * call stack.
+ */
+
+#define SIGTRAMP_SAVES \
+ .unwabi 3, 's'; /* mark this as a sigtramp handler (saves scratch regs) */ \
+ .unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */ \
+ .savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF; \
+ .savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF; \
+ .savesp pr, PR_OFF+SIGCONTEXT_OFF; \
+ .savesp rp, RP_OFF+SIGCONTEXT_OFF; \
+ .savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF; \
+ .vframesp SP_OFF+SIGCONTEXT_OFF
+
+GLOBAL_ENTRY(__kernel_sigtramp)
+ // describe the state that is active when we get here:
+ .prologue
+ SIGTRAMP_SAVES
+ .body
+
+ .label_state 1
+
+ adds base0=SIGHANDLER_OFF,sp
+ adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp
+ br.call.sptk.many rp=1f
+1:
+ ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF) // get pointer to signal handler's plabel
+ ld8 r15=[base1] // get address of new RBS base (or NULL)
+ cover // push args in interrupted frame onto backing store
+ ;;
+ cmp.ne p1,p0=r15,r0 // do we need to switch rbs? (note: pr is saved by kernel)
+ mov.m r9=ar.bsp // fetch ar.bsp
+ .spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
+(p1) br.cond.spnt setup_rbs // yup -> (clobbers p8, r14-r16, and r18-r20)
+back_from_setup_rbs:
+ alloc r8=ar.pfs,0,0,3,0
+ ld8 out0=[base0],16 // load arg0 (signum)
+ adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1
+ ;;
+ ld8 out1=[base1] // load arg1 (siginfop)
+ ld8 r10=[r17],8 // get signal handler entry point
+ ;;
+ ld8 out2=[base0] // load arg2 (sigcontextp)
+ ld8 gp=[r17] // get signal handler's global pointer
+ adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
+ ;;
+ .spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF
+ st8 [base0]=r9 // save sc_ar_bsp
+ adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
+ adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
+ ;;
+ stf.spill [base0]=f6,32
+ stf.spill [base1]=f7,32
+ ;;
+ stf.spill [base0]=f8,32
+ stf.spill [base1]=f9,32
+ mov b6=r10
+ ;;
+ stf.spill [base0]=f10,32
+ stf.spill [base1]=f11,32
+ ;;
+ stf.spill [base0]=f12,32
+ stf.spill [base1]=f13,32
+ ;;
+ stf.spill [base0]=f14,32
+ stf.spill [base1]=f15,32
+ br.call.sptk.many rp=b6 // call the signal handler
+.ret0: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
+ ;;
+ ld8 r15=[base0] // fetch sc_ar_bsp
+ mov r14=ar.bsp
+ ;;
+ cmp.ne p1,p0=r14,r15 // do we need to restore the rbs?
+(p1) br.cond.spnt restore_rbs // yup -> (clobbers r14-r18, f6 & f7)
+ ;;
+back_from_restore_rbs:
+ adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
+ adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
+ ;;
+ ldf.fill f6=[base0],32
+ ldf.fill f7=[base1],32
+ ;;
+ ldf.fill f8=[base0],32
+ ldf.fill f9=[base1],32
+ ;;
+ ldf.fill f10=[base0],32
+ ldf.fill f11=[base1],32
+ ;;
+ ldf.fill f12=[base0],32
+ ldf.fill f13=[base1],32
+ ;;
+ ldf.fill f14=[base0],32
+ ldf.fill f15=[base1],32
+ mov r15=__NR_rt_sigreturn
+ .restore sp // pop .prologue
+ break __BREAK_SYSCALL
+
+ .prologue
+ SIGTRAMP_SAVES
+setup_rbs:
+ mov ar.rsc=0 // put RSE into enforced lazy mode
+ ;;
+ .save ar.rnat, r19
+ mov r19=ar.rnat // save RNaT before switching backing store area
+ adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp
+
+ mov r18=ar.bspstore
+ mov ar.bspstore=r15 // switch over to new register backing store area
+ ;;
+
+ .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
+ st8 [r14]=r19 // save sc_ar_rnat
+ .body
+ mov.m r16=ar.bsp // sc_loadrs <- (new bsp - new bspstore) << 16
+ adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp
+ ;;
+ invala
+ sub r15=r16,r15
+ extr.u r20=r18,3,6
+ ;;
+ mov ar.rsc=0xf // set RSE into eager mode, pl 3
+ cmp.eq p8,p0=63,r20
+ shl r15=r15,16
+ ;;
+ st8 [r14]=r15 // save sc_loadrs
+(p8) st8 [r18]=r19 // if bspstore points at RNaT slot, store RNaT there now
+ .restore sp // pop .prologue
+ br.cond.sptk back_from_setup_rbs
+
+ .prologue
+ SIGTRAMP_SAVES
+ .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
+ .body
+restore_rbs:
+ // On input:
+ // r14 = bsp1 (bsp at the time of return from signal handler)
+ // r15 = bsp0 (bsp at the time the signal occurred)
+ //
+ // Here, we need to calculate bspstore0, the value that ar.bspstore needs
+ // to be set to, based on bsp0 and the size of the dirty partition on
+ // the alternate stack (sc_loadrs >> 16). This can be done with the
+ // following algorithm:
+ //
+ // bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1));
+ //
+ // This is what the code below does.
+ //
+ alloc r2=ar.pfs,0,0,0,0 // alloc null frame
+ adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp
+ adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp
+ ;;
+ ld8 r17=[r16]
+ ld8 r16=[r18] // get new rnat
+ extr.u r18=r15,3,6 // r18 <- rse_slot_num(bsp0)
+ ;;
+ mov ar.rsc=r17 // put RSE into enforced lazy mode
+ shr.u r17=r17,16
+ ;;
+ sub r14=r14,r17 // r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16)
+ shr.u r17=r17,3 // r17 <- (sc_loadrs >> 19)
+ ;;
+ loadrs // restore dirty partition
+ extr.u r14=r14,3,6 // r14 <- rse_slot_num(bspstore1)
+ ;;
+ add r14=r14,r17 // r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19)
+ ;;
+ shr.u r14=r14,6 // r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40
+ ;;
+ sub r14=r14,r17 // r14 <- -rse_num_regs(bspstore1, bsp1)
+ movl r17=0x8208208208208209
+ ;;
+ add r18=r18,r14 // r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1)
+ setf.sig f7=r17
+ cmp.lt p7,p0=r14,r0 // p7 <- (r14 < 0)?
+ ;;
+(p7) adds r18=-62,r18 // delta -= 62
+ ;;
+ setf.sig f6=r18
+ ;;
+ xmpy.h f6=f6,f7
+ ;;
+ getf.sig r17=f6
+ ;;
+ add r17=r17,r18
+ shr r18=r18,63
+ ;;
+ shr r17=r17,5
+ ;;
+ sub r17=r17,r18 // r17 = delta/63
+ ;;
+ add r17=r14,r17 // r17 <- delta/63 - rse_num_regs(bspstore1, bsp1)
+ ;;
+ shladd r15=r17,3,r15 // r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1))
+ ;;
+ mov ar.bspstore=r15 // switch back to old register backing store area
+ ;;
+ mov ar.rnat=r16 // restore RNaT
+ mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc)
+ // invala not necessary as that will happen when returning to user-mode
+ br.cond.sptk back_from_restore_rbs
+END(__kernel_sigtramp)
diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S
new file mode 100644
index 00000000000..e1e4aba9ecd
--- /dev/null
+++ b/arch/ia64/kernel/gate.lds.S
@@ -0,0 +1,95 @@
+/*
+ * Linker script for gate DSO. The gate pages are an ELF shared object prelinked to its
+ * virtual address, with only one read-only segment and one execute-only segment (both fit
+ * in one page). This script controls its layout.
+ */
+
+#include <linux/config.h>
+
+#include <asm/system.h>
+
+SECTIONS
+{
+ . = GATE_ADDR + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :readable
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .dynamic : { *(.dynamic) } :readable :dynamic
+
+ /*
+ * This linker script is used both with -r and with -shared. For the layouts to match,
+ * we need to skip more than enough space for the dynamic symbol table et al. If this
+ * amount is insufficient, ld -shared will barf. Just increase it here.
+ */
+ . = GATE_ADDR + 0x500;
+
+ .data.patch : {
+ __start_gate_mckinley_e9_patchlist = .;
+ *(.data.patch.mckinley_e9)
+ __end_gate_mckinley_e9_patchlist = .;
+
+ __start_gate_vtop_patchlist = .;
+ *(.data.patch.vtop)
+ __end_gate_vtop_patchlist = .;
+
+ __start_gate_fsyscall_patchlist = .;
+ *(.data.patch.fsyscall_table)
+ __end_gate_fsyscall_patchlist = .;
+
+ __start_gate_brl_fsys_bubble_down_patchlist = .;
+ *(.data.patch.brl_fsys_bubble_down)
+ __end_gate_brl_fsys_bubble_down_patchlist = .;
+ } :readable
+ .IA_64.unwind_info : { *(.IA_64.unwind_info*) }
+ .IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind
+#ifdef HAVE_BUGGY_SEGREL
+ .text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) } :readable
+#else
+ . = ALIGN (PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
+ .text : { *(.text) *(.text.*) } :epc
+#endif
+
+ /DISCARD/ : {
+ *(.got.plt) *(.got)
+ *(.data .data.* .gnu.linkonce.d.*)
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(__ex_table)
+ }
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ readable PT_LOAD FILEHDR PHDRS FLAGS(4); /* PF_R */
+#ifndef HAVE_BUGGY_SEGREL
+ epc PT_LOAD FILEHDR PHDRS FLAGS(1); /* PF_X */
+#endif
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ LINUX_2.5 {
+ global:
+ __kernel_syscall_via_break;
+ __kernel_syscall_via_epc;
+ __kernel_sigtramp;
+
+ local: *;
+ };
+}
+
+/* The ELF entry point can be used to set the AT_SYSINFO value. */
+ENTRY(__kernel_syscall_via_epc)
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
new file mode 100644
index 00000000000..105c7fec8c6
--- /dev/null
+++ b/arch/ia64/kernel/head.S
@@ -0,0 +1,996 @@
+/*
+ * Here is where the ball gets rolling as far as the kernel is concerned.
+ * When control is transferred to _start, the bootload has already
+ * loaded us to the correct address. All that's left to do here is
+ * to set up the kernel's global pointer and jump to the kernel
+ * entry point.
+ *
+ * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
+ * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com>
+ * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2.
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/fpu.h>
+#include <asm/kregs.h>
+#include <asm/mmu_context.h>
+#include <asm/offsets.h>
+#include <asm/pal.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+ .section __special_page_section,"ax"
+
+ .global empty_zero_page
+empty_zero_page:
+ .skip PAGE_SIZE
+
+ .global swapper_pg_dir
+swapper_pg_dir:
+ .skip PAGE_SIZE
+
+ .rodata
+halt_msg:
+ stringz "Halting kernel\n"
+
+ .text
+
+ .global start_ap
+
+ /*
+ * Start the kernel. When the bootloader passes control to _start(), r28
+ * points to the address of the boot parameter area. Execution reaches
+ * here in physical mode.
+ */
+GLOBAL_ENTRY(_start)
+start_ap:
+ .prologue
+ .save rp, r0 // terminate unwind chain with a NULL rp
+ .body
+
+ rsm psr.i | psr.ic
+ ;;
+ srlz.i
+ ;;
+ /*
+ * Initialize kernel region registers:
+ * rr[0]: VHPT enabled, page size = PAGE_SHIFT
+ * rr[1]: VHPT enabled, page size = PAGE_SHIFT
+ * rr[2]: VHPT enabled, page size = PAGE_SHIFT
+ * rr[3]: VHPT enabled, page size = PAGE_SHIFT
+ * rr[4]: VHPT enabled, page size = PAGE_SHIFT
+ * rr[5]: VHPT enabled, page size = PAGE_SHIFT
+ * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT
+ * rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT
+ * We initialize all of them to prevent inadvertently assuming
+ * something about the state of address translation early in boot.
+ */
+ mov r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+ movl r7=(0<<61)
+ mov r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+ movl r9=(1<<61)
+ mov r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+ movl r11=(2<<61)
+ mov r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+ movl r13=(3<<61)
+ mov r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+ movl r15=(4<<61)
+ mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+ movl r17=(5<<61)
+ mov r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
+ movl r19=(6<<61)
+ mov r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
+ movl r21=(7<<61)
+ ;;
+ mov rr[r7]=r6
+ mov rr[r9]=r8
+ mov rr[r11]=r10
+ mov rr[r13]=r12
+ mov rr[r15]=r14
+ mov rr[r17]=r16
+ mov rr[r19]=r18
+ mov rr[r21]=r20
+ ;;
+ /*
+ * Now pin mappings into the TLB for kernel text and data
+ */
+ mov r18=KERNEL_TR_PAGE_SHIFT<<2
+ movl r17=KERNEL_START
+ ;;
+ mov cr.itir=r18
+ mov cr.ifa=r17
+ mov r16=IA64_TR_KERNEL
+ mov r3=ip
+ movl r18=PAGE_KERNEL
+ ;;
+ dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
+ ;;
+ or r18=r2,r18
+ ;;
+ srlz.i
+ ;;
+ itr.i itr[r16]=r18
+ ;;
+ itr.d dtr[r16]=r18
+ ;;
+ srlz.i
+
+ /*
+ * Switch into virtual mode:
+ */
+ movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
+ |IA64_PSR_DI)
+ ;;
+ mov cr.ipsr=r16
+ movl r17=1f
+ ;;
+ mov cr.iip=r17
+ mov cr.ifs=r0
+ ;;
+ rfi
+ ;;
+1: // now we are in virtual mode
+
+ // set IVT entry point---can't access I/O ports without it
+ movl r3=ia64_ivt
+ ;;
+ mov cr.iva=r3
+ movl r2=FPSR_DEFAULT
+ ;;
+ srlz.i
+ movl gp=__gp
+
+ mov ar.fpsr=r2
+ ;;
+
+#define isAP p2 // are we an Application Processor?
+#define isBP p3 // are we the Bootstrap Processor?
+
+#ifdef CONFIG_SMP
+ /*
+ * Find the init_task for the currently booting CPU. At poweron, and in
+ * UP mode, task_for_booting_cpu is NULL.
+ */
+ movl r3=task_for_booting_cpu
+ ;;
+ ld8 r3=[r3]
+ movl r2=init_task
+ ;;
+ cmp.eq isBP,isAP=r3,r0
+ ;;
+(isAP) mov r2=r3
+#else
+ movl r2=init_task
+ cmp.eq isBP,isAP=r0,r0
+#endif
+ ;;
+ tpa r3=r2 // r3 == phys addr of task struct
+ mov r16=-1
+(isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it
+
+ // load mapping for stack (virtaddr in r2, physaddr in r3)
+ rsm psr.ic
+ movl r17=PAGE_KERNEL
+ ;;
+ srlz.d
+ dep r18=0,r3,0,12
+ ;;
+ or r18=r17,r18
+ dep r2=-1,r3,61,3 // IMVA of task
+ ;;
+ mov r17=rr[r2]
+ shr.u r16=r3,IA64_GRANULE_SHIFT
+ ;;
+ dep r17=0,r17,8,24
+ ;;
+ mov cr.itir=r17
+ mov cr.ifa=r2
+
+ mov r19=IA64_TR_CURRENT_STACK
+ ;;
+ itr.d dtr[r19]=r18
+ ;;
+ ssm psr.ic
+ srlz.d
+ ;;
+
+.load_current:
+ // load the "current" pointer (r13) and ar.k6 with the current task
+ mov IA64_KR(CURRENT)=r2 // virtual address
+ mov IA64_KR(CURRENT_STACK)=r16
+ mov r13=r2
+ /*
+ * Reserve space at the top of the stack for "struct pt_regs". Kernel threads
+ * don't store interesting values in that structure, but the space still needs
+ * to be there because time-critical stuff such as the context switching can
+ * be implemented more efficiently (for example, __switch_to()
+ * always sets the psr.dfh bit of the task it is switching to).
+ */
+ addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2
+ addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE
+ mov ar.rsc=0 // place RSE in enforced lazy mode
+ ;;
+ loadrs // clear the dirty partition
+ ;;
+ mov ar.bspstore=r2 // establish the new RSE stack
+ ;;
+ mov ar.rsc=0x3 // place RSE in eager mode
+
+(isBP) dep r28=-1,r28,61,3 // make address virtual
+(isBP) movl r2=ia64_boot_param
+ ;;
+(isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader
+
+#ifdef CONFIG_SMP
+(isAP) br.call.sptk.many rp=start_secondary
+.ret0:
+(isAP) br.cond.sptk self
+#endif
+
+ // This is executed by the bootstrap processor (bsp) only:
+
+#ifdef CONFIG_IA64_FW_EMU
+ // initialize PAL & SAL emulator:
+ br.call.sptk.many rp=sys_fw_init
+.ret1:
+#endif
+ br.call.sptk.many rp=start_kernel
+.ret2: addl r3=@ltoff(halt_msg),gp
+ ;;
+ alloc r2=ar.pfs,8,0,2,0
+ ;;
+ ld8 out0=[r3]
+ br.call.sptk.many b0=console_print
+
+self: hint @pause
+ br.sptk.many self // endless loop
+END(_start)
+
+GLOBAL_ENTRY(ia64_save_debug_regs)
+ alloc r16=ar.pfs,1,0,0,0
+ mov r20=ar.lc // preserve ar.lc
+ mov ar.lc=IA64_NUM_DBG_REGS-1
+ mov r18=0
+ add r19=IA64_NUM_DBG_REGS*8,in0
+ ;;
+1: mov r16=dbr[r18]
+#ifdef CONFIG_ITANIUM
+ ;;
+ srlz.d
+#endif
+ mov r17=ibr[r18]
+ add r18=1,r18
+ ;;
+ st8.nta [in0]=r16,8
+ st8.nta [r19]=r17,8
+ br.cloop.sptk.many 1b
+ ;;
+ mov ar.lc=r20 // restore ar.lc
+ br.ret.sptk.many rp
+END(ia64_save_debug_regs)
+
+GLOBAL_ENTRY(ia64_load_debug_regs)
+ alloc r16=ar.pfs,1,0,0,0
+ lfetch.nta [in0]
+ mov r20=ar.lc // preserve ar.lc
+ add r19=IA64_NUM_DBG_REGS*8,in0
+ mov ar.lc=IA64_NUM_DBG_REGS-1
+ mov r18=-1
+ ;;
+1: ld8.nta r16=[in0],8
+ ld8.nta r17=[r19],8
+ add r18=1,r18
+ ;;
+ mov dbr[r18]=r16
+#ifdef CONFIG_ITANIUM
+ ;;
+ srlz.d // Errata 132 (NoFix status)
+#endif
+ mov ibr[r18]=r17
+ br.cloop.sptk.many 1b
+ ;;
+ mov ar.lc=r20 // restore ar.lc
+ br.ret.sptk.many rp
+END(ia64_load_debug_regs)
+
+GLOBAL_ENTRY(__ia64_save_fpu)
+ alloc r2=ar.pfs,1,4,0,0
+ adds loc0=96*16-16,in0
+ adds loc1=96*16-16-128,in0
+ ;;
+ stf.spill.nta [loc0]=f127,-256
+ stf.spill.nta [loc1]=f119,-256
+ ;;
+ stf.spill.nta [loc0]=f111,-256
+ stf.spill.nta [loc1]=f103,-256
+ ;;
+ stf.spill.nta [loc0]=f95,-256
+ stf.spill.nta [loc1]=f87,-256
+ ;;
+ stf.spill.nta [loc0]=f79,-256
+ stf.spill.nta [loc1]=f71,-256
+ ;;
+ stf.spill.nta [loc0]=f63,-256
+ stf.spill.nta [loc1]=f55,-256
+ adds loc2=96*16-32,in0
+ ;;
+ stf.spill.nta [loc0]=f47,-256
+ stf.spill.nta [loc1]=f39,-256
+ adds loc3=96*16-32-128,in0
+ ;;
+ stf.spill.nta [loc2]=f126,-256
+ stf.spill.nta [loc3]=f118,-256
+ ;;
+ stf.spill.nta [loc2]=f110,-256
+ stf.spill.nta [loc3]=f102,-256
+ ;;
+ stf.spill.nta [loc2]=f94,-256
+ stf.spill.nta [loc3]=f86,-256
+ ;;
+ stf.spill.nta [loc2]=f78,-256
+ stf.spill.nta [loc3]=f70,-256
+ ;;
+ stf.spill.nta [loc2]=f62,-256
+ stf.spill.nta [loc3]=f54,-256
+ adds loc0=96*16-48,in0
+ ;;
+ stf.spill.nta [loc2]=f46,-256
+ stf.spill.nta [loc3]=f38,-256
+ adds loc1=96*16-48-128,in0
+ ;;
+ stf.spill.nta [loc0]=f125,-256
+ stf.spill.nta [loc1]=f117,-256
+ ;;
+ stf.spill.nta [loc0]=f109,-256
+ stf.spill.nta [loc1]=f101,-256
+ ;;
+ stf.spill.nta [loc0]=f93,-256
+ stf.spill.nta [loc1]=f85,-256
+ ;;
+ stf.spill.nta [loc0]=f77,-256
+ stf.spill.nta [loc1]=f69,-256
+ ;;
+ stf.spill.nta [loc0]=f61,-256
+ stf.spill.nta [loc1]=f53,-256
+ adds loc2=96*16-64,in0
+ ;;
+ stf.spill.nta [loc0]=f45,-256
+ stf.spill.nta [loc1]=f37,-256
+ adds loc3=96*16-64-128,in0
+ ;;
+ stf.spill.nta [loc2]=f124,-256
+ stf.spill.nta [loc3]=f116,-256
+ ;;
+ stf.spill.nta [loc2]=f108,-256
+ stf.spill.nta [loc3]=f100,-256
+ ;;
+ stf.spill.nta [loc2]=f92,-256
+ stf.spill.nta [loc3]=f84,-256
+ ;;
+ stf.spill.nta [loc2]=f76,-256
+ stf.spill.nta [loc3]=f68,-256
+ ;;
+ stf.spill.nta [loc2]=f60,-256
+ stf.spill.nta [loc3]=f52,-256
+ adds loc0=96*16-80,in0
+ ;;
+ stf.spill.nta [loc2]=f44,-256
+ stf.spill.nta [loc3]=f36,-256
+ adds loc1=96*16-80-128,in0
+ ;;
+ stf.spill.nta [loc0]=f123,-256
+ stf.spill.nta [loc1]=f115,-256
+ ;;
+ stf.spill.nta [loc0]=f107,-256
+ stf.spill.nta [loc1]=f99,-256
+ ;;
+ stf.spill.nta [loc0]=f91,-256
+ stf.spill.nta [loc1]=f83,-256
+ ;;
+ stf.spill.nta [loc0]=f75,-256
+ stf.spill.nta [loc1]=f67,-256
+ ;;
+ stf.spill.nta [loc0]=f59,-256
+ stf.spill.nta [loc1]=f51,-256
+ adds loc2=96*16-96,in0
+ ;;
+ stf.spill.nta [loc0]=f43,-256
+ stf.spill.nta [loc1]=f35,-256
+ adds loc3=96*16-96-128,in0
+ ;;
+ stf.spill.nta [loc2]=f122,-256
+ stf.spill.nta [loc3]=f114,-256
+ ;;
+ stf.spill.nta [loc2]=f106,-256
+ stf.spill.nta [loc3]=f98,-256
+ ;;
+ stf.spill.nta [loc2]=f90,-256
+ stf.spill.nta [loc3]=f82,-256
+ ;;
+ stf.spill.nta [loc2]=f74,-256
+ stf.spill.nta [loc3]=f66,-256
+ ;;
+ stf.spill.nta [loc2]=f58,-256
+ stf.spill.nta [loc3]=f50,-256
+ adds loc0=96*16-112,in0
+ ;;
+ stf.spill.nta [loc2]=f42,-256
+ stf.spill.nta [loc3]=f34,-256
+ adds loc1=96*16-112-128,in0
+ ;;
+ stf.spill.nta [loc0]=f121,-256
+ stf.spill.nta [loc1]=f113,-256
+ ;;
+ stf.spill.nta [loc0]=f105,-256
+ stf.spill.nta [loc1]=f97,-256
+ ;;
+ stf.spill.nta [loc0]=f89,-256
+ stf.spill.nta [loc1]=f81,-256
+ ;;
+ stf.spill.nta [loc0]=f73,-256
+ stf.spill.nta [loc1]=f65,-256
+ ;;
+ stf.spill.nta [loc0]=f57,-256
+ stf.spill.nta [loc1]=f49,-256
+ adds loc2=96*16-128,in0
+ ;;
+ stf.spill.nta [loc0]=f41,-256
+ stf.spill.nta [loc1]=f33,-256
+ adds loc3=96*16-128-128,in0
+ ;;
+ stf.spill.nta [loc2]=f120,-256
+ stf.spill.nta [loc3]=f112,-256
+ ;;
+ stf.spill.nta [loc2]=f104,-256
+ stf.spill.nta [loc3]=f96,-256
+ ;;
+ stf.spill.nta [loc2]=f88,-256
+ stf.spill.nta [loc3]=f80,-256
+ ;;
+ stf.spill.nta [loc2]=f72,-256
+ stf.spill.nta [loc3]=f64,-256
+ ;;
+ stf.spill.nta [loc2]=f56,-256
+ stf.spill.nta [loc3]=f48,-256
+ ;;
+ stf.spill.nta [loc2]=f40
+ stf.spill.nta [loc3]=f32
+ br.ret.sptk.many rp
+END(__ia64_save_fpu)
+
+GLOBAL_ENTRY(__ia64_load_fpu)
+ alloc r2=ar.pfs,1,2,0,0
+ adds r3=128,in0
+ adds r14=256,in0
+ adds r15=384,in0
+ mov loc0=512
+ mov loc1=-1024+16
+ ;;
+ ldf.fill.nta f32=[in0],loc0
+ ldf.fill.nta f40=[ r3],loc0
+ ldf.fill.nta f48=[r14],loc0
+ ldf.fill.nta f56=[r15],loc0
+ ;;
+ ldf.fill.nta f64=[in0],loc0
+ ldf.fill.nta f72=[ r3],loc0
+ ldf.fill.nta f80=[r14],loc0
+ ldf.fill.nta f88=[r15],loc0
+ ;;
+ ldf.fill.nta f96=[in0],loc1
+ ldf.fill.nta f104=[ r3],loc1
+ ldf.fill.nta f112=[r14],loc1
+ ldf.fill.nta f120=[r15],loc1
+ ;;
+ ldf.fill.nta f33=[in0],loc0
+ ldf.fill.nta f41=[ r3],loc0
+ ldf.fill.nta f49=[r14],loc0
+ ldf.fill.nta f57=[r15],loc0
+ ;;
+ ldf.fill.nta f65=[in0],loc0
+ ldf.fill.nta f73=[ r3],loc0
+ ldf.fill.nta f81=[r14],loc0
+ ldf.fill.nta f89=[r15],loc0
+ ;;
+ ldf.fill.nta f97=[in0],loc1
+ ldf.fill.nta f105=[ r3],loc1
+ ldf.fill.nta f113=[r14],loc1
+ ldf.fill.nta f121=[r15],loc1
+ ;;
+ ldf.fill.nta f34=[in0],loc0
+ ldf.fill.nta f42=[ r3],loc0
+ ldf.fill.nta f50=[r14],loc0
+ ldf.fill.nta f58=[r15],loc0
+ ;;
+ ldf.fill.nta f66=[in0],loc0
+ ldf.fill.nta f74=[ r3],loc0
+ ldf.fill.nta f82=[r14],loc0
+ ldf.fill.nta f90=[r15],loc0
+ ;;
+ ldf.fill.nta f98=[in0],loc1
+ ldf.fill.nta f106=[ r3],loc1
+ ldf.fill.nta f114=[r14],loc1
+ ldf.fill.nta f122=[r15],loc1
+ ;;
+ ldf.fill.nta f35=[in0],loc0
+ ldf.fill.nta f43=[ r3],loc0
+ ldf.fill.nta f51=[r14],loc0
+ ldf.fill.nta f59=[r15],loc0
+ ;;
+ ldf.fill.nta f67=[in0],loc0
+ ldf.fill.nta f75=[ r3],loc0
+ ldf.fill.nta f83=[r14],loc0
+ ldf.fill.nta f91=[r15],loc0
+ ;;
+ ldf.fill.nta f99=[in0],loc1
+ ldf.fill.nta f107=[ r3],loc1
+ ldf.fill.nta f115=[r14],loc1
+ ldf.fill.nta f123=[r15],loc1
+ ;;
+ ldf.fill.nta f36=[in0],loc0
+ ldf.fill.nta f44=[ r3],loc0
+ ldf.fill.nta f52=[r14],loc0
+ ldf.fill.nta f60=[r15],loc0
+ ;;
+ ldf.fill.nta f68=[in0],loc0
+ ldf.fill.nta f76=[ r3],loc0
+ ldf.fill.nta f84=[r14],loc0
+ ldf.fill.nta f92=[r15],loc0
+ ;;
+ ldf.fill.nta f100=[in0],loc1
+ ldf.fill.nta f108=[ r3],loc1
+ ldf.fill.nta f116=[r14],loc1
+ ldf.fill.nta f124=[r15],loc1
+ ;;
+ ldf.fill.nta f37=[in0],loc0
+ ldf.fill.nta f45=[ r3],loc0
+ ldf.fill.nta f53=[r14],loc0
+ ldf.fill.nta f61=[r15],loc0
+ ;;
+ ldf.fill.nta f69=[in0],loc0
+ ldf.fill.nta f77=[ r3],loc0
+ ldf.fill.nta f85=[r14],loc0
+ ldf.fill.nta f93=[r15],loc0
+ ;;
+ ldf.fill.nta f101=[in0],loc1
+ ldf.fill.nta f109=[ r3],loc1
+ ldf.fill.nta f117=[r14],loc1
+ ldf.fill.nta f125=[r15],loc1
+ ;;
+ ldf.fill.nta f38 =[in0],loc0
+ ldf.fill.nta f46 =[ r3],loc0
+ ldf.fill.nta f54 =[r14],loc0
+ ldf.fill.nta f62 =[r15],loc0
+ ;;
+ ldf.fill.nta f70 =[in0],loc0
+ ldf.fill.nta f78 =[ r3],loc0
+ ldf.fill.nta f86 =[r14],loc0
+ ldf.fill.nta f94 =[r15],loc0
+ ;;
+ ldf.fill.nta f102=[in0],loc1
+ ldf.fill.nta f110=[ r3],loc1
+ ldf.fill.nta f118=[r14],loc1
+ ldf.fill.nta f126=[r15],loc1
+ ;;
+ ldf.fill.nta f39 =[in0],loc0
+ ldf.fill.nta f47 =[ r3],loc0
+ ldf.fill.nta f55 =[r14],loc0
+ ldf.fill.nta f63 =[r15],loc0
+ ;;
+ ldf.fill.nta f71 =[in0],loc0
+ ldf.fill.nta f79 =[ r3],loc0
+ ldf.fill.nta f87 =[r14],loc0
+ ldf.fill.nta f95 =[r15],loc0
+ ;;
+ ldf.fill.nta f103=[in0]
+ ldf.fill.nta f111=[ r3]
+ ldf.fill.nta f119=[r14]
+ ldf.fill.nta f127=[r15]
+ br.ret.sptk.many rp
+END(__ia64_load_fpu)
+
+GLOBAL_ENTRY(__ia64_init_fpu)
+ stf.spill [sp]=f0 // M3
+ mov f32=f0 // F
+ nop.b 0
+
+ ldfps f33,f34=[sp] // M0
+ ldfps f35,f36=[sp] // M1
+ mov f37=f0 // F
+ ;;
+
+ setf.s f38=r0 // M2
+ setf.s f39=r0 // M3
+ mov f40=f0 // F
+
+ ldfps f41,f42=[sp] // M0
+ ldfps f43,f44=[sp] // M1
+ mov f45=f0 // F
+
+ setf.s f46=r0 // M2
+ setf.s f47=r0 // M3
+ mov f48=f0 // F
+
+ ldfps f49,f50=[sp] // M0
+ ldfps f51,f52=[sp] // M1
+ mov f53=f0 // F
+
+ setf.s f54=r0 // M2
+ setf.s f55=r0 // M3
+ mov f56=f0 // F
+
+ ldfps f57,f58=[sp] // M0
+ ldfps f59,f60=[sp] // M1
+ mov f61=f0 // F
+
+ setf.s f62=r0 // M2
+ setf.s f63=r0 // M3
+ mov f64=f0 // F
+
+ ldfps f65,f66=[sp] // M0
+ ldfps f67,f68=[sp] // M1
+ mov f69=f0 // F
+
+ setf.s f70=r0 // M2
+ setf.s f71=r0 // M3
+ mov f72=f0 // F
+
+ ldfps f73,f74=[sp] // M0
+ ldfps f75,f76=[sp] // M1
+ mov f77=f0 // F
+
+ setf.s f78=r0 // M2
+ setf.s f79=r0 // M3
+ mov f80=f0 // F
+
+ ldfps f81,f82=[sp] // M0
+ ldfps f83,f84=[sp] // M1
+ mov f85=f0 // F
+
+ setf.s f86=r0 // M2
+ setf.s f87=r0 // M3
+ mov f88=f0 // F
+
+ /*
+ * When the instructions are cached, it would be faster to initialize
+ * the remaining registers with simply mov instructions (F-unit).
+ * This gets the time down to ~29 cycles. However, this would use up
+ * 33 bundles, whereas continuing with the above pattern yields
+ * 10 bundles and ~30 cycles.
+ */
+
+ ldfps f89,f90=[sp] // M0
+ ldfps f91,f92=[sp] // M1
+ mov f93=f0 // F
+
+ setf.s f94=r0 // M2
+ setf.s f95=r0 // M3
+ mov f96=f0 // F
+
+ ldfps f97,f98=[sp] // M0
+ ldfps f99,f100=[sp] // M1
+ mov f101=f0 // F
+
+ setf.s f102=r0 // M2
+ setf.s f103=r0 // M3
+ mov f104=f0 // F
+
+ ldfps f105,f106=[sp] // M0
+ ldfps f107,f108=[sp] // M1
+ mov f109=f0 // F
+
+ setf.s f110=r0 // M2
+ setf.s f111=r0 // M3
+ mov f112=f0 // F
+
+ ldfps f113,f114=[sp] // M0
+ ldfps f115,f116=[sp] // M1
+ mov f117=f0 // F
+
+ setf.s f118=r0 // M2
+ setf.s f119=r0 // M3
+ mov f120=f0 // F
+
+ ldfps f121,f122=[sp] // M0
+ ldfps f123,f124=[sp] // M1
+ mov f125=f0 // F
+
+ setf.s f126=r0 // M2
+ setf.s f127=r0 // M3
+ br.ret.sptk.many rp // F
+END(__ia64_init_fpu)
+
+/*
+ * Switch execution mode from virtual to physical
+ *
+ * Inputs:
+ * r16 = new psr to establish
+ * Output:
+ * r19 = old virtual address of ar.bsp
+ * r20 = old virtual address of sp
+ *
+ * Note: RSE must already be in enforced lazy mode
+ */
+GLOBAL_ENTRY(ia64_switch_mode_phys)
+ {
+ alloc r2=ar.pfs,0,0,0,0
+ rsm psr.i | psr.ic // disable interrupts and interrupt collection
+ mov r15=ip
+ }
+ ;;
+ {
+ flushrs // must be first insn in group
+ srlz.i
+ }
+ ;;
+ mov cr.ipsr=r16 // set new PSR
+ add r3=1f-ia64_switch_mode_phys,r15
+
+ mov r19=ar.bsp
+ mov r20=sp
+ mov r14=rp // get return address into a general register
+ ;;
+
+ // going to physical mode, use tpa to translate virt->phys
+ tpa r17=r19
+ tpa r3=r3
+ tpa sp=sp
+ tpa r14=r14
+ ;;
+
+ mov r18=ar.rnat // save ar.rnat
+ mov ar.bspstore=r17 // this steps on ar.rnat
+ mov cr.iip=r3
+ mov cr.ifs=r0
+ ;;
+ mov ar.rnat=r18 // restore ar.rnat
+ rfi // must be last insn in group
+ ;;
+1: mov rp=r14
+ br.ret.sptk.many rp
+END(ia64_switch_mode_phys)
+
+/*
+ * Switch execution mode from physical to virtual
+ *
+ * Inputs:
+ * r16 = new psr to establish
+ * r19 = new bspstore to establish
+ * r20 = new sp to establish
+ *
+ * Note: RSE must already be in enforced lazy mode
+ */
+GLOBAL_ENTRY(ia64_switch_mode_virt)
+ {
+ alloc r2=ar.pfs,0,0,0,0
+ rsm psr.i | psr.ic // disable interrupts and interrupt collection
+ mov r15=ip
+ }
+ ;;
+ {
+ flushrs // must be first insn in group
+ srlz.i
+ }
+ ;;
+ mov cr.ipsr=r16 // set new PSR
+ add r3=1f-ia64_switch_mode_virt,r15
+
+ mov r14=rp // get return address into a general register
+ ;;
+
+ // going to virtual
+ // - for code addresses, set upper bits of addr to KERNEL_START
+ // - for stack addresses, copy from input argument
+ movl r18=KERNEL_START
+ dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+ dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+ mov sp=r20
+ ;;
+ or r3=r3,r18
+ or r14=r14,r18
+ ;;
+
+ mov r18=ar.rnat // save ar.rnat
+ mov ar.bspstore=r19 // this steps on ar.rnat
+ mov cr.iip=r3
+ mov cr.ifs=r0
+ ;;
+ mov ar.rnat=r18 // restore ar.rnat
+ rfi // must be last insn in group
+ ;;
+1: mov rp=r14
+ br.ret.sptk.many rp
+END(ia64_switch_mode_virt)
+
+GLOBAL_ENTRY(ia64_delay_loop)
+ .prologue
+{ nop 0 // work around GAS unwind info generation bug...
+ .save ar.lc,r2
+ mov r2=ar.lc
+ .body
+ ;;
+ mov ar.lc=r32
+}
+ ;;
+ // force loop to be 32-byte aligned (GAS bug means we cannot use .align
+ // inside function body without corrupting unwind info).
+{ nop 0 }
+1: br.cloop.sptk.few 1b
+ ;;
+ mov ar.lc=r2
+ br.ret.sptk.many rp
+END(ia64_delay_loop)
+
+/*
+ * Return a CPU-local timestamp in nano-seconds. This timestamp is
+ * NOT synchronized across CPUs its return value must never be
+ * compared against the values returned on another CPU. The usage in
+ * kernel/sched.c ensures that.
+ *
+ * The return-value of sched_clock() is NOT supposed to wrap-around.
+ * If it did, it would cause some scheduling hiccups (at the worst).
+ * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even
+ * that would happen only once every 5+ years.
+ *
+ * The code below basically calculates:
+ *
+ * (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT
+ *
+ * except that the multiplication and the shift are done with 128-bit
+ * intermediate precision so that we can produce a full 64-bit result.
+ */
+GLOBAL_ENTRY(sched_clock)
+ addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+ mov.m r9=ar.itc // fetch cycle-counter (35 cyc)
+ ;;
+ ldf8 f8=[r8]
+ ;;
+ setf.sig f9=r9 // certain to stall, so issue it _after_ ldf8...
+ ;;
+ xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc)
+ xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product
+ ;;
+ getf.sig r8=f10 // (5 cyc)
+ getf.sig r9=f11
+ ;;
+ shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
+ br.ret.sptk.many rp
+END(sched_clock)
+
+GLOBAL_ENTRY(start_kernel_thread)
+ .prologue
+ .save rp, r0 // this is the end of the call-chain
+ .body
+ alloc r2 = ar.pfs, 0, 0, 2, 0
+ mov out0 = r9
+ mov out1 = r11;;
+ br.call.sptk.many rp = kernel_thread_helper;;
+ mov out0 = r8
+ br.call.sptk.many rp = sys_exit;;
+1: br.sptk.few 1b // not reached
+END(start_kernel_thread)
+
+#ifdef CONFIG_IA64_BRL_EMU
+
+/*
+ * Assembly routines used by brl_emu.c to set preserved register state.
+ */
+
+#define SET_REG(reg) \
+ GLOBAL_ENTRY(ia64_set_##reg); \
+ alloc r16=ar.pfs,1,0,0,0; \
+ mov reg=r32; \
+ ;; \
+ br.ret.sptk.many rp; \
+ END(ia64_set_##reg)
+
+SET_REG(b1);
+SET_REG(b2);
+SET_REG(b3);
+SET_REG(b4);
+SET_REG(b5);
+
+#endif /* CONFIG_IA64_BRL_EMU */
+
+#ifdef CONFIG_SMP
+ /*
+ * This routine handles spinlock contention. It uses a non-standard calling
+ * convention to avoid converting leaf routines into interior routines. Because
+ * of this special convention, there are several restrictions:
+ *
+ * - do not use gp relative variables, this code is called from the kernel
+ * and from modules, r1 is undefined.
+ * - do not use stacked registers, the caller owns them.
+ * - do not use the scratch stack space, the caller owns it.
+ * - do not use any registers other than the ones listed below
+ *
+ * Inputs:
+ * ar.pfs - saved CFM of caller
+ * ar.ccv - 0 (and available for use)
+ * r27 - flags from spin_lock_irqsave or 0. Must be preserved.
+ * r28 - available for use.
+ * r29 - available for use.
+ * r30 - available for use.
+ * r31 - address of lock, available for use.
+ * b6 - return address
+ * p14 - available for use.
+ * p15 - used to track flag status.
+ *
+ * If you patch this code to use more registers, do not forget to update
+ * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h.
+ */
+
+#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
+
+GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
+ .prologue
+ .save ar.pfs, r0 // this code effectively has a zero frame size
+ .save rp, r28
+ .body
+ nop 0
+ tbit.nz p15,p0=r27,IA64_PSR_I_BIT
+ .restore sp // pop existing prologue after next insn
+ mov b6 = r28
+ .prologue
+ .save ar.pfs, r0
+ .altrp b6
+ .body
+ ;;
+(p15) ssm psr.i // reenable interrupts if they were on
+ // DavidM says that srlz.d is slow and is not required in this case
+.wait:
+ // exponential backoff, kdb, lockmeter etc. go in here
+ hint @pause
+ ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word
+ nop 0
+ ;;
+ cmp4.ne p14,p0=r30,r0
+(p14) br.cond.sptk.few .wait
+(p15) rsm psr.i // disable interrupts if we reenabled them
+ br.cond.sptk.few b6 // lock is now free, try to acquire
+ .global ia64_spinlock_contention_pre3_4_end // for kernprof
+ia64_spinlock_contention_pre3_4_end:
+END(ia64_spinlock_contention_pre3_4)
+
+#else
+
+GLOBAL_ENTRY(ia64_spinlock_contention)
+ .prologue
+ .altrp b6
+ .body
+ tbit.nz p15,p0=r27,IA64_PSR_I_BIT
+ ;;
+.wait:
+(p15) ssm psr.i // reenable interrupts if they were on
+ // DavidM says that srlz.d is slow and is not required in this case
+.wait2:
+ // exponential backoff, kdb, lockmeter etc. go in here
+ hint @pause
+ ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word
+ ;;
+ cmp4.ne p14,p0=r30,r0
+ mov r30 = 1
+(p14) br.cond.sptk.few .wait2
+(p15) rsm psr.i // disable interrupts if we reenabled them
+ ;;
+ cmpxchg4.acq r30=[r31], r30, ar.ccv
+ ;;
+ cmp4.ne p14,p0=r0,r30
+(p14) br.cond.sptk.few .wait
+
+ br.ret.sptk.many b6 // lock is now taken
+END(ia64_spinlock_contention)
+
+#endif
+
+#endif /* CONFIG_SMP */
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
new file mode 100644
index 00000000000..7bbf019c986
--- /dev/null
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -0,0 +1,127 @@
+/*
+ * Architecture-specific kernel symbols
+ *
+ * Don't put any exports here unless it's defined in an assembler file.
+ * All other exports should be put directly after the definition.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/string.h>
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memchr);
+EXPORT_SYMBOL(memcmp);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(memscan);
+EXPORT_SYMBOL(strcat);
+EXPORT_SYMBOL(strchr);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strcpy);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(strncat);
+EXPORT_SYMBOL(strncmp);
+EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(strrchr);
+EXPORT_SYMBOL(strstr);
+EXPORT_SYMBOL(strpbrk);
+
+#include <asm/checksum.h>
+EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */
+
+#include <asm/semaphore.h>
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_interruptible);
+EXPORT_SYMBOL(__down_trylock);
+EXPORT_SYMBOL(__up);
+
+#include <asm/page.h>
+EXPORT_SYMBOL(clear_page);
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+#include <linux/bootmem.h>
+EXPORT_SYMBOL(max_low_pfn); /* defined by bootmem.c, but not exported by generic code */
+#endif
+
+#include <asm/processor.h>
+EXPORT_SYMBOL(per_cpu__cpu_info);
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(per_cpu__local_per_cpu_offset);
+#endif
+
+#include <asm/uaccess.h>
+EXPORT_SYMBOL(__copy_user);
+EXPORT_SYMBOL(__do_clear_user);
+EXPORT_SYMBOL(__strlen_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(__strnlen_user);
+
+#include <asm/unistd.h>
+EXPORT_SYMBOL(__ia64_syscall);
+
+/* from arch/ia64/lib */
+extern void __divsi3(void);
+extern void __udivsi3(void);
+extern void __modsi3(void);
+extern void __umodsi3(void);
+extern void __divdi3(void);
+extern void __udivdi3(void);
+extern void __moddi3(void);
+extern void __umoddi3(void);
+
+EXPORT_SYMBOL(__divsi3);
+EXPORT_SYMBOL(__udivsi3);
+EXPORT_SYMBOL(__modsi3);
+EXPORT_SYMBOL(__umodsi3);
+EXPORT_SYMBOL(__divdi3);
+EXPORT_SYMBOL(__udivdi3);
+EXPORT_SYMBOL(__moddi3);
+EXPORT_SYMBOL(__umoddi3);
+
+#if defined(CONFIG_MD_RAID5) || defined(CONFIG_MD_RAID5_MODULE)
+extern void xor_ia64_2(void);
+extern void xor_ia64_3(void);
+extern void xor_ia64_4(void);
+extern void xor_ia64_5(void);
+
+EXPORT_SYMBOL(xor_ia64_2);
+EXPORT_SYMBOL(xor_ia64_3);
+EXPORT_SYMBOL(xor_ia64_4);
+EXPORT_SYMBOL(xor_ia64_5);
+#endif
+
+#include <asm/pal.h>
+EXPORT_SYMBOL(ia64_pal_call_phys_stacked);
+EXPORT_SYMBOL(ia64_pal_call_phys_static);
+EXPORT_SYMBOL(ia64_pal_call_stacked);
+EXPORT_SYMBOL(ia64_pal_call_static);
+EXPORT_SYMBOL(ia64_load_scratch_fpregs);
+EXPORT_SYMBOL(ia64_save_scratch_fpregs);
+
+#include <asm/unwind.h>
+EXPORT_SYMBOL(unw_init_running);
+
+#ifdef ASM_SUPPORTED
+# ifdef CONFIG_SMP
+# if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
+/*
+ * This is not a normal routine and we don't want a function descriptor for it, so we use
+ * a fake declaration here.
+ */
+extern char ia64_spinlock_contention_pre3_4;
+EXPORT_SYMBOL(ia64_spinlock_contention_pre3_4);
+# else
+/*
+ * This is not a normal routine and we don't want a function descriptor for it, so we use
+ * a fake declaration here.
+ */
+extern char ia64_spinlock_contention;
+EXPORT_SYMBOL(ia64_spinlock_contention);
+# endif
+# endif
+#endif
+
+extern char ia64_ivt[];
+EXPORT_SYMBOL(ia64_ivt);
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
new file mode 100644
index 00000000000..b69c397ed1b
--- /dev/null
+++ b/arch/ia64/kernel/init_task.c
@@ -0,0 +1,46 @@
+/*
+ * This is where we statically allocate and initialize the initial
+ * task.
+ *
+ * Copyright (C) 1999, 2002-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init_task.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is properly aligned due to the way process stacks are
+ * handled. This is done by having a special ".data.init_task" section...
+ */
+#define init_thread_info init_task_mem.s.thread_info
+
+union {
+ struct {
+ struct task_struct task;
+ struct thread_info thread_info;
+ } s;
+ unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
+} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{
+ .task = INIT_TASK(init_task_mem.s.task),
+ .thread_info = INIT_THREAD_INFO(init_task_mem.s.task)
+}};
+
+EXPORT_SYMBOL(init_task);
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
new file mode 100644
index 00000000000..c15be5c38f5
--- /dev/null
+++ b/arch/ia64/kernel/iosapic.c
@@ -0,0 +1,827 @@
+/*
+ * I/O SAPIC support.
+ *
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
+ * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co.
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ *
+ * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O APIC code.
+ * In particular, we now have separate handlers for edge
+ * and level triggered interrupts.
+ * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation
+ * PCI to vector mapping, shared PCI interrupts.
+ * 00/10/27 D. Mosberger Document things a bit more to make them more understandable.
+ * Clean up much of the old IOSAPIC cruft.
+ * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts and fixes for
+ * ACPI S5(SoftOff) support.
+ * 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT
+ * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt vectors in
+ * iosapic_set_affinity(), initializations for
+ * /proc/irq/#/smp_affinity
+ * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing.
+ * 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq
+ * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping
+ * error
+ * 02/07/29 T. Kochi Allocate interrupt vectors dynamically
+ * 02/08/04 T. Kochi Cleaned up terminology (irq, global system interrupt, vector, etc.)
+ * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's pci_irq code.
+ * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC.
+ * Remove iosapic_address & gsi_base from external interfaces.
+ * Rationalize __init/__devinit attributes.
+ * 04/12/04 Ashok Raj <ashok.raj@intel.com> Intel Corporation 2004
+ * Updated to work with irq migration necessary for CPU Hotplug
+ */
+/*
+ * Here is what the interrupt logic between a PCI device and the kernel looks like:
+ *
+ * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD). The
+ * device is uniquely identified by its bus--, and slot-number (the function
+ * number does not matter here because all functions share the same interrupt
+ * lines).
+ *
+ * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller.
+ * Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level
+ * triggered and use the same polarity). Each interrupt line has a unique Global
+ * System Interrupt (GSI) number which can be calculated as the sum of the controller's
+ * base GSI number and the IOSAPIC pin number to which the line connects.
+ *
+ * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the IOSAPIC pin
+ * into the IA-64 interrupt vector. This interrupt vector is then sent to the CPU.
+ *
+ * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is used as
+ * architecture-independent interrupt handling mechanism in Linux. As an
+ * IRQ is a number, we have to have IA-64 interrupt vector number <-> IRQ number
+ * mapping. On smaller systems, we use one-to-one mapping between IA-64 vector and
+ * IRQ. A platform can implement platform_irq_to_vector(irq) and
+ * platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
+ * Please see also include/asm-ia64/hw_irq.h for those APIs.
+ *
+ * To sum up, there are three levels of mappings involved:
+ *
+ * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
+ *
+ * Note: The term "IRQ" is loosely used everywhere in Linux kernel to describe interrupts.
+ * Now we use "IRQ" only for Linux IRQ's. ISA IRQ (isa_irq) is the only exception in this
+ * source code.
+ */
+#include <linux/config.h>
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/string.h>
+
+#include <asm/delay.h>
+#include <asm/hw_irq.h>
+#include <asm/io.h>
+#include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+
+#undef DEBUG_INTERRUPT_ROUTING
+
+#ifdef DEBUG_INTERRUPT_ROUTING
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static DEFINE_SPINLOCK(iosapic_lock);
+
+/* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */
+
+static struct iosapic_intr_info {
+ char __iomem *addr; /* base address of IOSAPIC */
+ u32 low32; /* current value of low word of Redirection table entry */
+ unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
+ char rte_index; /* IOSAPIC RTE index (-1 => not an IOSAPIC interrupt) */
+ unsigned char dmode : 3; /* delivery mode (see iosapic.h) */
+ unsigned char polarity: 1; /* interrupt polarity (see iosapic.h) */
+ unsigned char trigger : 1; /* trigger mode (see iosapic.h) */
+ int refcnt; /* reference counter */
+} iosapic_intr_info[IA64_NUM_VECTORS];
+
+static struct iosapic {
+ char __iomem *addr; /* base address of IOSAPIC */
+ unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
+ unsigned short num_rte; /* number of RTE in this IOSAPIC */
+#ifdef CONFIG_NUMA
+ unsigned short node; /* numa node association via pxm */
+#endif
+} iosapic_lists[NR_IOSAPICS];
+
+static int num_iosapic;
+
+static unsigned char pcat_compat __initdata; /* 8259 compatibility flag */
+
+
+/*
+ * Find an IOSAPIC associated with a GSI
+ */
+static inline int
+find_iosapic (unsigned int gsi)
+{
+ int i;
+
+ for (i = 0; i < num_iosapic; i++) {
+ if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte)
+ return i;
+ }
+
+ return -1;
+}
+
+static inline int
+_gsi_to_vector (unsigned int gsi)
+{
+ struct iosapic_intr_info *info;
+
+ for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info)
+ if (info->gsi_base + info->rte_index == gsi)
+ return info - iosapic_intr_info;
+ return -1;
+}
+
+/*
+ * Translate GSI number to the corresponding IA-64 interrupt vector. If no
+ * entry exists, return -1.
+ */
+inline int
+gsi_to_vector (unsigned int gsi)
+{
+ return _gsi_to_vector(gsi);
+}
+
+int
+gsi_to_irq (unsigned int gsi)
+{
+ /*
+ * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq
+ * numbers...
+ */
+ return _gsi_to_vector(gsi);
+}
+
+static void
+set_rte (unsigned int vector, unsigned int dest, int mask)
+{
+ unsigned long pol, trigger, dmode;
+ u32 low32, high32;
+ char __iomem *addr;
+ int rte_index;
+ char redir;
+
+ DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
+
+ rte_index = iosapic_intr_info[vector].rte_index;
+ if (rte_index < 0)
+ return; /* not an IOSAPIC interrupt */
+
+ addr = iosapic_intr_info[vector].addr;
+ pol = iosapic_intr_info[vector].polarity;
+ trigger = iosapic_intr_info[vector].trigger;
+ dmode = iosapic_intr_info[vector].dmode;
+ vector &= (~IA64_IRQ_REDIRECTED);
+
+ redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
+
+#ifdef CONFIG_SMP
+ {
+ unsigned int irq;
+
+ for (irq = 0; irq < NR_IRQS; ++irq)
+ if (irq_to_vector(irq) == vector) {
+ set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
+ break;
+ }
+ }
+#endif
+
+ low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
+ (trigger << IOSAPIC_TRIGGER_SHIFT) |
+ (dmode << IOSAPIC_DELIVERY_SHIFT) |
+ ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) |
+ vector);
+
+ /* dest contains both id and eid */
+ high32 = (dest << IOSAPIC_DEST_SHIFT);
+
+ iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
+ iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+ iosapic_intr_info[vector].low32 = low32;
+}
+
+static void
+nop (unsigned int vector)
+{
+ /* do nothing... */
+}
+
+static void
+mask_irq (unsigned int irq)
+{
+ unsigned long flags;
+ char __iomem *addr;
+ u32 low32;
+ int rte_index;
+ ia64_vector vec = irq_to_vector(irq);
+
+ addr = iosapic_intr_info[vec].addr;
+ rte_index = iosapic_intr_info[vec].rte_index;
+
+ if (rte_index < 0)
+ return; /* not an IOSAPIC interrupt! */
+
+ spin_lock_irqsave(&iosapic_lock, flags);
+ {
+ /* set only the mask bit */
+ low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
+ iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+ }
+ spin_unlock_irqrestore(&iosapic_lock, flags);
+}
+
+static void
+unmask_irq (unsigned int irq)
+{
+ unsigned long flags;
+ char __iomem *addr;
+ u32 low32;
+ int rte_index;
+ ia64_vector vec = irq_to_vector(irq);
+
+ addr = iosapic_intr_info[vec].addr;
+ rte_index = iosapic_intr_info[vec].rte_index;
+ if (rte_index < 0)
+ return; /* not an IOSAPIC interrupt! */
+
+ spin_lock_irqsave(&iosapic_lock, flags);
+ {
+ low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
+ iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+ }
+ spin_unlock_irqrestore(&iosapic_lock, flags);
+}
+
+
+static void
+iosapic_set_affinity (unsigned int irq, cpumask_t mask)
+{
+#ifdef CONFIG_SMP
+ unsigned long flags;
+ u32 high32, low32;
+ int dest, rte_index;
+ char __iomem *addr;
+ int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
+ ia64_vector vec;
+
+ irq &= (~IA64_IRQ_REDIRECTED);
+ vec = irq_to_vector(irq);
+
+ if (cpus_empty(mask))
+ return;
+
+ dest = cpu_physical_id(first_cpu(mask));
+
+ rte_index = iosapic_intr_info[vec].rte_index;
+ addr = iosapic_intr_info[vec].addr;
+
+ if (rte_index < 0)
+ return; /* not an IOSAPIC interrupt */
+
+ set_irq_affinity_info(irq, dest, redir);
+
+ /* dest contains both id and eid */
+ high32 = dest << IOSAPIC_DEST_SHIFT;
+
+ spin_lock_irqsave(&iosapic_lock, flags);
+ {
+ low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
+
+ if (redir)
+ /* change delivery mode to lowest priority */
+ low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
+ else
+ /* change delivery mode to fixed */
+ low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
+
+ iosapic_intr_info[vec].low32 = low32;
+ iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
+ iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
+ }
+ spin_unlock_irqrestore(&iosapic_lock, flags);
+#endif
+}
+
+/*
+ * Handlers for level-triggered interrupts.
+ */
+
+static unsigned int
+iosapic_startup_level_irq (unsigned int irq)
+{
+ unmask_irq(irq);
+ return 0;
+}
+
+static void
+iosapic_end_level_irq (unsigned int irq)
+{
+ ia64_vector vec = irq_to_vector(irq);
+
+ move_irq(irq);
+ iosapic_eoi(iosapic_intr_info[vec].addr, vec);
+}
+
+#define iosapic_shutdown_level_irq mask_irq
+#define iosapic_enable_level_irq unmask_irq
+#define iosapic_disable_level_irq mask_irq
+#define iosapic_ack_level_irq nop
+
+struct hw_interrupt_type irq_type_iosapic_level = {
+ .typename = "IO-SAPIC-level",
+ .startup = iosapic_startup_level_irq,
+ .shutdown = iosapic_shutdown_level_irq,
+ .enable = iosapic_enable_level_irq,
+ .disable = iosapic_disable_level_irq,
+ .ack = iosapic_ack_level_irq,
+ .end = iosapic_end_level_irq,
+ .set_affinity = iosapic_set_affinity
+};
+
+/*
+ * Handlers for edge-triggered interrupts.
+ */
+
+static unsigned int
+iosapic_startup_edge_irq (unsigned int irq)
+{
+ unmask_irq(irq);
+ /*
+ * IOSAPIC simply drops interrupts pended while the
+ * corresponding pin was masked, so we can't know if an
+ * interrupt is pending already. Let's hope not...
+ */
+ return 0;
+}
+
+static void
+iosapic_ack_edge_irq (unsigned int irq)
+{
+ irq_desc_t *idesc = irq_descp(irq);
+
+ move_irq(irq);
+ /*
+ * Once we have recorded IRQ_PENDING already, we can mask the
+ * interrupt for real. This prevents IRQ storms from unhandled
+ * devices.
+ */
+ if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED))
+ mask_irq(irq);
+}
+
+#define iosapic_enable_edge_irq unmask_irq
+#define iosapic_disable_edge_irq nop
+#define iosapic_end_edge_irq nop
+
+struct hw_interrupt_type irq_type_iosapic_edge = {
+ .typename = "IO-SAPIC-edge",
+ .startup = iosapic_startup_edge_irq,
+ .shutdown = iosapic_disable_edge_irq,
+ .enable = iosapic_enable_edge_irq,
+ .disable = iosapic_disable_edge_irq,
+ .ack = iosapic_ack_edge_irq,
+ .end = iosapic_end_edge_irq,
+ .set_affinity = iosapic_set_affinity
+};
+
+unsigned int
+iosapic_version (char __iomem *addr)
+{
+ /*
+ * IOSAPIC Version Register return 32 bit structure like:
+ * {
+ * unsigned int version : 8;
+ * unsigned int reserved1 : 8;
+ * unsigned int max_redir : 8;
+ * unsigned int reserved2 : 8;
+ * }
+ */
+ return iosapic_read(addr, IOSAPIC_VERSION);
+}
+
+/*
+ * if the given vector is already owned by other,
+ * assign a new vector for the other and make the vector available
+ */
+static void __init
+iosapic_reassign_vector (int vector)
+{
+ int new_vector;
+
+ if (iosapic_intr_info[vector].rte_index >= 0 || iosapic_intr_info[vector].addr
+ || iosapic_intr_info[vector].gsi_base || iosapic_intr_info[vector].dmode
+ || iosapic_intr_info[vector].polarity || iosapic_intr_info[vector].trigger)
+ {
+ new_vector = assign_irq_vector(AUTO_ASSIGN);
+ printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector);
+ memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
+ sizeof(struct iosapic_intr_info));
+ memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
+ iosapic_intr_info[vector].rte_index = -1;
+ }
+}
+
+static void
+register_intr (unsigned int gsi, int vector, unsigned char delivery,
+ unsigned long polarity, unsigned long trigger)
+{
+ irq_desc_t *idesc;
+ struct hw_interrupt_type *irq_type;
+ int rte_index;
+ int index;
+ unsigned long gsi_base;
+ void __iomem *iosapic_address;
+
+ index = find_iosapic(gsi);
+ if (index < 0) {
+ printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi);
+ return;
+ }
+
+ iosapic_address = iosapic_lists[index].addr;
+ gsi_base = iosapic_lists[index].gsi_base;
+
+ rte_index = gsi - gsi_base;
+ iosapic_intr_info[vector].rte_index = rte_index;
+ iosapic_intr_info[vector].polarity = polarity;
+ iosapic_intr_info[vector].dmode = delivery;
+ iosapic_intr_info[vector].addr = iosapic_address;
+ iosapic_intr_info[vector].gsi_base = gsi_base;
+ iosapic_intr_info[vector].trigger = trigger;
+ iosapic_intr_info[vector].refcnt++;
+
+ if (trigger == IOSAPIC_EDGE)
+ irq_type = &irq_type_iosapic_edge;
+ else
+ irq_type = &irq_type_iosapic_level;
+
+ idesc = irq_descp(vector);
+ if (idesc->handler != irq_type) {
+ if (idesc->handler != &no_irq_type)
+ printk(KERN_WARNING "%s: changing vector %d from %s to %s\n",
+ __FUNCTION__, vector, idesc->handler->typename, irq_type->typename);
+ idesc->handler = irq_type;
+ }
+}
+
+static unsigned int
+get_target_cpu (unsigned int gsi, int vector)
+{
+#ifdef CONFIG_SMP
+ static int cpu = -1;
+
+ /*
+ * If the platform supports redirection via XTP, let it
+ * distribute interrupts.
+ */
+ if (smp_int_redirect & SMP_IRQ_REDIRECTION)
+ return cpu_physical_id(smp_processor_id());
+
+ /*
+ * Some interrupts (ACPI SCI, for instance) are registered
+ * before the BSP is marked as online.
+ */
+ if (!cpu_online(smp_processor_id()))
+ return cpu_physical_id(smp_processor_id());
+
+#ifdef CONFIG_NUMA
+ {
+ int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
+ cpumask_t cpu_mask;
+
+ iosapic_index = find_iosapic(gsi);
+ if (iosapic_index < 0 ||
+ iosapic_lists[iosapic_index].node == MAX_NUMNODES)
+ goto skip_numa_setup;
+
+ cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
+
+ for_each_cpu_mask(numa_cpu, cpu_mask) {
+ if (!cpu_online(numa_cpu))
+ cpu_clear(numa_cpu, cpu_mask);
+ }
+
+ num_cpus = cpus_weight(cpu_mask);
+
+ if (!num_cpus)
+ goto skip_numa_setup;
+
+ /* Use vector assigment to distribute across cpus in node */
+ cpu_index = vector % num_cpus;
+
+ for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
+ numa_cpu = next_cpu(numa_cpu, cpu_mask);
+
+ if (numa_cpu != NR_CPUS)
+ return cpu_physical_id(numa_cpu);
+ }
+skip_numa_setup:
+#endif
+ /*
+ * Otherwise, round-robin interrupt vectors across all the
+ * processors. (It'd be nice if we could be smarter in the
+ * case of NUMA.)
+ */
+ do {
+ if (++cpu >= NR_CPUS)
+ cpu = 0;
+ } while (!cpu_online(cpu));
+
+ return cpu_physical_id(cpu);
+#else
+ return cpu_physical_id(smp_processor_id());
+#endif
+}
+
+/*
+ * ACPI can describe IOSAPIC interrupts via static tables and namespace
+ * methods. This provides an interface to register those interrupts and
+ * program the IOSAPIC RTE.
+ */
+int
+iosapic_register_intr (unsigned int gsi,
+ unsigned long polarity, unsigned long trigger)
+{
+ int vector;
+ unsigned int dest;
+ unsigned long flags;
+
+ /*
+ * If this GSI has already been registered (i.e., it's a
+ * shared interrupt, or we lost a race to register it),
+ * don't touch the RTE.
+ */
+ spin_lock_irqsave(&iosapic_lock, flags);
+ {
+ vector = gsi_to_vector(gsi);
+ if (vector > 0) {
+ iosapic_intr_info[vector].refcnt++;
+ spin_unlock_irqrestore(&iosapic_lock, flags);
+ return vector;
+ }
+
+ vector = assign_irq_vector(AUTO_ASSIGN);
+ dest = get_target_cpu(gsi, vector);
+ register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
+ polarity, trigger);
+
+ set_rte(vector, dest, 1);
+ }
+ spin_unlock_irqrestore(&iosapic_lock, flags);
+
+ printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
+ gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
+ (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
+ cpu_logical_id(dest), dest, vector);
+
+ return vector;
+}
+
+#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
+void
+iosapic_unregister_intr (unsigned int gsi)
+{
+ unsigned long flags;
+ int irq, vector;
+ irq_desc_t *idesc;
+ int rte_index;
+ unsigned long trigger, polarity;
+
+ /*
+ * If the irq associated with the gsi is not found,
+ * iosapic_unregister_intr() is unbalanced. We need to check
+ * this again after getting locks.
+ */
+ irq = gsi_to_irq(gsi);
+ if (irq < 0) {
+ printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
+ WARN_ON(1);
+ return;
+ }
+ vector = irq_to_vector(irq);
+
+ idesc = irq_descp(irq);
+ spin_lock_irqsave(&idesc->lock, flags);
+ spin_lock(&iosapic_lock);
+ {
+ rte_index = iosapic_intr_info[vector].rte_index;
+ if (rte_index < 0) {
+ spin_unlock(&iosapic_lock);
+ spin_unlock_irqrestore(&idesc->lock, flags);
+ printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
+ WARN_ON(1);
+ return;
+ }
+
+ if (--iosapic_intr_info[vector].refcnt > 0) {
+ spin_unlock(&iosapic_lock);
+ spin_unlock_irqrestore(&idesc->lock, flags);
+ return;
+ }
+
+ /*
+ * If interrupt handlers still exist on the irq
+ * associated with the gsi, don't unregister the
+ * interrupt.
+ */
+ if (idesc->action) {
+ iosapic_intr_info[vector].refcnt++;
+ spin_unlock(&iosapic_lock);
+ spin_unlock_irqrestore(&idesc->lock, flags);
+ printk(KERN_WARNING "Cannot unregister GSI. IRQ %u is still in use.\n", irq);
+ return;
+ }
+
+ /* Clear the interrupt controller descriptor. */
+ idesc->handler = &no_irq_type;
+
+ trigger = iosapic_intr_info[vector].trigger;
+ polarity = iosapic_intr_info[vector].polarity;
+
+ /* Clear the interrupt information. */
+ memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
+ iosapic_intr_info[vector].rte_index = -1; /* mark as unused */
+ }
+ spin_unlock(&iosapic_lock);
+ spin_unlock_irqrestore(&idesc->lock, flags);
+
+ /* Free the interrupt vector */
+ free_irq_vector(vector);
+
+ printk(KERN_INFO "GSI %u (%s, %s) -> vector %d unregisterd.\n",
+ gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
+ (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
+ vector);
+}
+#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
+
+/*
+ * ACPI calls this when it finds an entry for a platform interrupt.
+ * Note that the irq_base and IOSAPIC address must be set in iosapic_init().
+ */
+int __init
+iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
+ int iosapic_vector, u16 eid, u16 id,
+ unsigned long polarity, unsigned long trigger)
+{
+ static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
+ unsigned char delivery;
+ int vector, mask = 0;
+ unsigned int dest = ((id << 8) | eid) & 0xffff;
+
+ switch (int_type) {
+ case ACPI_INTERRUPT_PMI:
+ vector = iosapic_vector;
+ /*
+ * since PMI vector is alloc'd by FW(ACPI) not by kernel,
+ * we need to make sure the vector is available
+ */
+ iosapic_reassign_vector(vector);
+ delivery = IOSAPIC_PMI;
+ break;
+ case ACPI_INTERRUPT_INIT:
+ vector = assign_irq_vector(AUTO_ASSIGN);
+ delivery = IOSAPIC_INIT;
+ break;
+ case ACPI_INTERRUPT_CPEI:
+ vector = IA64_CPE_VECTOR;
+ delivery = IOSAPIC_LOWEST_PRIORITY;
+ mask = 1;
+ break;
+ default:
+ printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type 0x%x\n", int_type);
+ return -1;
+ }
+
+ register_intr(gsi, vector, delivery, polarity, trigger);
+
+ printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
+ int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
+ int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
+ (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
+ cpu_logical_id(dest), dest, vector);
+
+ set_rte(vector, dest, mask);
+ return vector;
+}
+
+
+/*
+ * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
+ * Note that the gsi_base and IOSAPIC address must be set in iosapic_init().
+ */
+void __init
+iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
+ unsigned long polarity,
+ unsigned long trigger)
+{
+ int vector;
+ unsigned int dest = cpu_physical_id(smp_processor_id());
+
+ vector = isa_irq_to_vector(isa_irq);
+
+ register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger);
+
+ DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
+ isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
+ polarity == IOSAPIC_POL_HIGH ? "high" : "low",
+ cpu_logical_id(dest), dest, vector);
+
+ set_rte(vector, dest, 1);
+}
+
+void __init
+iosapic_system_init (int system_pcat_compat)
+{
+ int vector;
+
+ for (vector = 0; vector < IA64_NUM_VECTORS; ++vector)
+ iosapic_intr_info[vector].rte_index = -1; /* mark as unused */
+
+ pcat_compat = system_pcat_compat;
+ if (pcat_compat) {
+ /*
+ * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support
+ * enabled.
+ */
+ printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__);
+ outb(0xff, 0xA1);
+ outb(0xff, 0x21);
+ }
+}
+
+void __init
+iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
+{
+ int num_rte;
+ unsigned int isa_irq, ver;
+ char __iomem *addr;
+
+ addr = ioremap(phys_addr, 0);
+ ver = iosapic_version(addr);
+
+ /*
+ * The MAX_REDIR register holds the highest input pin
+ * number (starting from 0).
+ * We add 1 so that we can use it for number of pins (= RTEs)
+ */
+ num_rte = ((ver >> 16) & 0xff) + 1;
+
+ iosapic_lists[num_iosapic].addr = addr;
+ iosapic_lists[num_iosapic].gsi_base = gsi_base;
+ iosapic_lists[num_iosapic].num_rte = num_rte;
+#ifdef CONFIG_NUMA
+ iosapic_lists[num_iosapic].node = MAX_NUMNODES;
+#endif
+ num_iosapic++;
+
+ if ((gsi_base == 0) && pcat_compat) {
+ /*
+ * Map the legacy ISA devices into the IOSAPIC data. Some of these may
+ * get reprogrammed later on with data from the ACPI Interrupt Source
+ * Override table.
+ */
+ for (isa_irq = 0; isa_irq < 16; ++isa_irq)
+ iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE);
+ }
+}
+
+#ifdef CONFIG_NUMA
+void __init
+map_iosapic_to_node(unsigned int gsi_base, int node)
+{
+ int index;
+
+ index = find_iosapic(gsi_base);
+ if (index < 0) {
+ printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
+ __FUNCTION__, gsi_base);
+ return;
+ }
+ iosapic_lists[index].node = node;
+ return;
+}
+#endif
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
new file mode 100644
index 00000000000..28f2aadc38d
--- /dev/null
+++ b/arch/ia64/kernel/irq.c
@@ -0,0 +1,238 @@
+/*
+ * linux/arch/ia64/kernel/irq.c
+ *
+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * Copyright (C) Ashok Raj<ashok.raj@intel.com>, Intel Corporation 2004
+ *
+ * 4/14/2004: Added code to handle cpu migration and do safe irq
+ * migration without lossing interrupts for iosapic
+ * architecture.
+ */
+
+#include <asm/delay.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+ printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id());
+}
+
+#ifdef CONFIG_IA64_GENERIC
+unsigned int __ia64_local_vector_to_irq (ia64_vector vec)
+{
+ return (unsigned int) vec;
+}
+#endif
+
+/*
+ * Interrupt statistics:
+ */
+
+atomic_t irq_err_count;
+
+/*
+ * /proc/interrupts printing:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int i = *(loff_t *) v, j;
+ struct irqaction * action;
+ unsigned long flags;
+
+ if (i == 0) {
+ seq_printf(p, " ");
+ for (j=0; j<NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "CPU%d ",j);
+ seq_putc(p, '\n');
+ }
+
+ if (i < NR_IRQS) {
+ spin_lock_irqsave(&irq_desc[i].lock, flags);
+ action = irq_desc[i].action;
+ if (!action)
+ goto skip;
+ seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+ seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+ seq_printf(p, " %14s", irq_desc[i].handler->typename);
+ seq_printf(p, " %s", action->name);
+
+ for (action=action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+
+ seq_putc(p, '\n');
+skip:
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ } else if (i == NR_IRQS)
+ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+ return 0;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * This is updated when the user sets irq affinity via /proc
+ */
+static cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
+static unsigned long pending_irq_redir[BITS_TO_LONGS(NR_IRQS)];
+
+static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
+
+/*
+ * Arch specific routine for deferred write to iosapic rte to reprogram
+ * intr destination.
+ */
+void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
+{
+ pending_irq_cpumask[irq] = mask_val;
+}
+
+void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
+{
+ cpumask_t mask = CPU_MASK_NONE;
+
+ cpu_set(cpu_logical_id(hwid), mask);
+
+ if (irq < NR_IRQS) {
+ irq_affinity[irq] = mask;
+ irq_redir[irq] = (char) (redir & 0xff);
+ }
+}
+
+
+void move_irq(int irq)
+{
+ /* note - we hold desc->lock */
+ cpumask_t tmp;
+ irq_desc_t *desc = irq_descp(irq);
+ int redir = test_bit(irq, pending_irq_redir);
+
+ if (unlikely(!desc->handler->set_affinity))
+ return;
+
+ if (!cpus_empty(pending_irq_cpumask[irq])) {
+ cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
+ if (unlikely(!cpus_empty(tmp))) {
+ desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0),
+ pending_irq_cpumask[irq]);
+ }
+ cpus_clear(pending_irq_cpumask[irq]);
+ }
+}
+
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_HOTPLUG_CPU
+unsigned int vectors_in_migration[NR_IRQS];
+
+/*
+ * Since cpu_online_map is already updated, we just need to check for
+ * affinity that has zeros
+ */
+static void migrate_irqs(void)
+{
+ cpumask_t mask;
+ irq_desc_t *desc;
+ int irq, new_cpu;
+
+ for (irq=0; irq < NR_IRQS; irq++) {
+ desc = irq_descp(irq);
+
+ /*
+ * No handling for now.
+ * TBD: Implement a disable function so we can now
+ * tell CPU not to respond to these local intr sources.
+ * such as ITV,CPEI,MCA etc.
+ */
+ if (desc->status == IRQ_PER_CPU)
+ continue;
+
+ cpus_and(mask, irq_affinity[irq], cpu_online_map);
+ if (any_online_cpu(mask) == NR_CPUS) {
+ /*
+ * Save it for phase 2 processing
+ */
+ vectors_in_migration[irq] = irq;
+
+ new_cpu = any_online_cpu(cpu_online_map);
+ mask = cpumask_of_cpu(new_cpu);
+
+ /*
+ * Al three are essential, currently WARN_ON.. maybe panic?
+ */
+ if (desc->handler && desc->handler->disable &&
+ desc->handler->enable && desc->handler->set_affinity) {
+ desc->handler->disable(irq);
+ desc->handler->set_affinity(irq, mask);
+ desc->handler->enable(irq);
+ } else {
+ WARN_ON((!(desc->handler) || !(desc->handler->disable) ||
+ !(desc->handler->enable) ||
+ !(desc->handler->set_affinity)));
+ }
+ }
+ }
+}
+
+void fixup_irqs(void)
+{
+ unsigned int irq;
+ extern void ia64_process_pending_intr(void);
+
+ ia64_set_itv(1<<16);
+ /*
+ * Phase 1: Locate irq's bound to this cpu and
+ * relocate them for cpu removal.
+ */
+ migrate_irqs();
+
+ /*
+ * Phase 2: Perform interrupt processing for all entries reported in
+ * local APIC.
+ */
+ ia64_process_pending_intr();
+
+ /*
+ * Phase 3: Now handle any interrupts not captured in local APIC.
+ * This is to account for cases that device interrupted during the time the
+ * rte was being disabled and re-programmed.
+ */
+ for (irq=0; irq < NR_IRQS; irq++) {
+ if (vectors_in_migration[irq]) {
+ vectors_in_migration[irq]=0;
+ __do_IRQ(irq, NULL);
+ }
+ }
+
+ /*
+ * Now let processor die. We do irq disable and max_xtp() to
+ * ensure there is no more interrupts routed to this processor.
+ * But the local timer interrupt can have 1 pending which we
+ * take care in timer_interrupt().
+ */
+ max_xtp();
+ local_irq_disable();
+}
+#endif
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
new file mode 100644
index 00000000000..5ba06ebe355
--- /dev/null
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -0,0 +1,278 @@
+/*
+ * linux/arch/ia64/kernel/irq.c
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 6/10/99: Updated to bring in sync with x86 version to facilitate
+ * support for SMP and different interrupt controllers.
+ *
+ * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector
+ * PCI to vector allocation routine.
+ * 04/14/2004 Ashok Raj <ashok.raj@intel.com>
+ * Added CPU Hotplug handling for IPF.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/jiffies.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+#include <linux/slab.h>
+#include <linux/ptrace.h>
+#include <linux/random.h> /* for rand_initialize_irq() */
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/threads.h>
+#include <linux/bitops.h>
+
+#include <asm/delay.h>
+#include <asm/intrinsics.h>
+#include <asm/io.h>
+#include <asm/hw_irq.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_PERFMON
+# include <asm/perfmon.h>
+#endif
+
+#define IRQ_DEBUG 0
+
+/* default base addr of IPI table */
+void __iomem *ipi_base_addr = ((void __iomem *)
+ (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR));
+
+/*
+ * Legacy IRQ to IA-64 vector translation table.
+ */
+__u8 isa_irq_to_vector_map[16] = {
+ /* 8259 IRQ translation, first 16 entries */
+ 0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29,
+ 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21
+};
+EXPORT_SYMBOL(isa_irq_to_vector_map);
+
+static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_NUM_DEVICE_VECTORS)];
+
+int
+assign_irq_vector (int irq)
+{
+ int pos, vector;
+ again:
+ pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS);
+ vector = IA64_FIRST_DEVICE_VECTOR + pos;
+ if (vector > IA64_LAST_DEVICE_VECTOR)
+ /* XXX could look for sharable vectors instead of panic'ing... */
+ panic("assign_irq_vector: out of interrupt vectors!");
+ if (test_and_set_bit(pos, ia64_vector_mask))
+ goto again;
+ return vector;
+}
+
+void
+free_irq_vector (int vector)
+{
+ int pos;
+
+ if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR)
+ return;
+
+ pos = vector - IA64_FIRST_DEVICE_VECTOR;
+ if (!test_and_clear_bit(pos, ia64_vector_mask))
+ printk(KERN_WARNING "%s: double free!\n", __FUNCTION__);
+}
+
+#ifdef CONFIG_SMP
+# define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE)
+#else
+# define IS_RESCHEDULE(vec) (0)
+#endif
+/*
+ * That's where the IVT branches when we get an external
+ * interrupt. This branches to the correct hardware IRQ handler via
+ * function ptr.
+ */
+void
+ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
+{
+ unsigned long saved_tpr;
+
+#if IRQ_DEBUG
+ {
+ unsigned long bsp, sp;
+
+ /*
+ * Note: if the interrupt happened while executing in
+ * the context switch routine (ia64_switch_to), we may
+ * get a spurious stack overflow here. This is
+ * because the register and the memory stack are not
+ * switched atomically.
+ */
+ bsp = ia64_getreg(_IA64_REG_AR_BSP);
+ sp = ia64_getreg(_IA64_REG_SP);
+
+ if ((sp - bsp) < 1024) {
+ static unsigned char count;
+ static long last_time;
+
+ if (jiffies - last_time > 5*HZ)
+ count = 0;
+ if (++count < 5) {
+ last_time = jiffies;
+ printk("ia64_handle_irq: DANGER: less than "
+ "1KB of free stack space!!\n"
+ "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
+ }
+ }
+ }
+#endif /* IRQ_DEBUG */
+
+ /*
+ * Always set TPR to limit maximum interrupt nesting depth to
+ * 16 (without this, it would be ~240, which could easily lead
+ * to kernel stack overflows).
+ */
+ irq_enter();
+ saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+ ia64_srlz_d();
+ while (vector != IA64_SPURIOUS_INT_VECTOR) {
+ if (!IS_RESCHEDULE(vector)) {
+ ia64_setreg(_IA64_REG_CR_TPR, vector);
+ ia64_srlz_d();
+
+ __do_IRQ(local_vector_to_irq(vector), regs);
+
+ /*
+ * Disable interrupts and send EOI:
+ */
+ local_irq_disable();
+ ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
+ }
+ ia64_eoi();
+ vector = ia64_get_ivr();
+ }
+ /*
+ * This must be done *after* the ia64_eoi(). For example, the keyboard softirq
+ * handler needs to be able to wait for further keyboard interrupts, which can't
+ * come through until ia64_eoi() has been done.
+ */
+ irq_exit();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * This function emulates a interrupt processing when a cpu is about to be
+ * brought down.
+ */
+void ia64_process_pending_intr(void)
+{
+ ia64_vector vector;
+ unsigned long saved_tpr;
+ extern unsigned int vectors_in_migration[NR_IRQS];
+
+ vector = ia64_get_ivr();
+
+ irq_enter();
+ saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+ ia64_srlz_d();
+
+ /*
+ * Perform normal interrupt style processing
+ */
+ while (vector != IA64_SPURIOUS_INT_VECTOR) {
+ if (!IS_RESCHEDULE(vector)) {
+ ia64_setreg(_IA64_REG_CR_TPR, vector);
+ ia64_srlz_d();
+
+ /*
+ * Now try calling normal ia64_handle_irq as it would have got called
+ * from a real intr handler. Try passing null for pt_regs, hopefully
+ * it will work. I hope it works!.
+ * Probably could shared code.
+ */
+ vectors_in_migration[local_vector_to_irq(vector)]=0;
+ __do_IRQ(local_vector_to_irq(vector), NULL);
+
+ /*
+ * Disable interrupts and send EOI
+ */
+ local_irq_disable();
+ ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
+ }
+ ia64_eoi();
+ vector = ia64_get_ivr();
+ }
+ irq_exit();
+}
+#endif
+
+
+#ifdef CONFIG_SMP
+extern irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs);
+
+static struct irqaction ipi_irqaction = {
+ .handler = handle_IPI,
+ .flags = SA_INTERRUPT,
+ .name = "IPI"
+};
+#endif
+
+void
+register_percpu_irq (ia64_vector vec, struct irqaction *action)
+{
+ irq_desc_t *desc;
+ unsigned int irq;
+
+ for (irq = 0; irq < NR_IRQS; ++irq)
+ if (irq_to_vector(irq) == vec) {
+ desc = irq_descp(irq);
+ desc->status |= IRQ_PER_CPU;
+ desc->handler = &irq_type_ia64_lsapic;
+ if (action)
+ setup_irq(irq, action);
+ }
+}
+
+void __init
+init_IRQ (void)
+{
+ register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
+#ifdef CONFIG_SMP
+ register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
+#endif
+#ifdef CONFIG_PERFMON
+ pfm_init_percpu();
+#endif
+ platform_irq_init();
+}
+
+void
+ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
+{
+ void __iomem *ipi_addr;
+ unsigned long ipi_data;
+ unsigned long phys_cpu_id;
+
+#ifdef CONFIG_SMP
+ phys_cpu_id = cpu_physical_id(cpu);
+#else
+ phys_cpu_id = (ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff;
+#endif
+
+ /*
+ * cpu number is in 8bit ID and 8bit EID
+ */
+
+ ipi_data = (delivery_mode << 8) | (vector & 0xff);
+ ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3));
+
+ writeq(ipi_data, ipi_addr);
+}
diff --git a/arch/ia64/kernel/irq_lsapic.c b/arch/ia64/kernel/irq_lsapic.c
new file mode 100644
index 00000000000..ea14e6a0440
--- /dev/null
+++ b/arch/ia64/kernel/irq_lsapic.c
@@ -0,0 +1,37 @@
+/*
+ * LSAPIC Interrupt Controller
+ *
+ * This takes care of interrupts that are generated by the CPU's
+ * internal Streamlined Advanced Programmable Interrupt Controller
+ * (LSAPIC), such as the ITC and IPI interrupts.
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/irq.h>
+
+static unsigned int
+lsapic_noop_startup (unsigned int irq)
+{
+ return 0;
+}
+
+static void
+lsapic_noop (unsigned int irq)
+{
+ /* nuthing to do... */
+}
+
+struct hw_interrupt_type irq_type_ia64_lsapic = {
+ .typename = "LSAPIC",
+ .startup = lsapic_noop_startup,
+ .shutdown = lsapic_noop,
+ .enable = lsapic_noop,
+ .disable = lsapic_noop,
+ .ack = lsapic_noop,
+ .end = lsapic_noop
+};
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
new file mode 100644
index 00000000000..d9c05d53435
--- /dev/null
+++ b/arch/ia64/kernel/ivt.S
@@ -0,0 +1,1619 @@
+/*
+ * arch/ia64/kernel/ivt.S
+ *
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 2000, 2002-2003 Intel Co
+ * Asit Mallick <asit.k.mallick@intel.com>
+ * Suresh Siddha <suresh.b.siddha@intel.com>
+ * Kenneth Chen <kenneth.w.chen@intel.com>
+ * Fenghua Yu <fenghua.yu@intel.com>
+ *
+ * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT.
+ */
+/*
+ * This file defines the interruption vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for critical
+ * interruptions like TLB misses.
+ *
+ * For each entry, the comment is as follows:
+ *
+ * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ * entry offset ----/ / / / /
+ * entry number ---------/ / / /
+ * size of the entry -------------/ / /
+ * vector name -------------------------------------/ /
+ * interruptions triggering this vector ----------------------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.6 (Oct 1999)
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/break.h>
+#include <asm/ia32.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+#include <asm/errno.h>
+
+#if 1
+# define PSR_DEFAULT_BITS psr.ac
+#else
+# define PSR_DEFAULT_BITS 0
+#endif
+
+#if 0
+ /*
+ * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't
+ * needed for something else before enabling this...
+ */
+# define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16
+#else
+# define DBG_FAULT(i)
+#endif
+
+#define MINSTATE_VIRT /* needed by minstate.h */
+#include "minstate.h"
+
+#define FAULT(n) \
+ mov r31=pr; \
+ mov r19=n;; /* prepare to save predicates */ \
+ br.sptk.many dispatch_to_fault_handler
+
+ .section .text.ivt,"ax"
+
+ .align 32768 // align on 32KB boundary
+ .global ia64_ivt
+ia64_ivt:
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ENTRY(vhpt_miss)
+ DBG_FAULT(0)
+ /*
+ * The VHPT vector is invoked when the TLB entry for the virtual page table
+ * is missing. This happens only as a result of a previous
+ * (the "original") TLB miss, which may either be caused by an instruction
+ * fetch or a data access (or non-access).
+ *
+ * What we do here is normal TLB miss handing for the _original_ miss, followed
+ * by inserting the TLB entry for the virtual page table page that the VHPT
+ * walker was attempting to access. The latter gets inserted as long
+ * as both L1 and L2 have valid mappings for the faulting address.
+ * The TLB entry for the original miss gets inserted only if
+ * the L3 entry indicates that the page is present.
+ *
+ * do_page_fault gets invoked in the following cases:
+ * - the faulting virtual address uses unimplemented address bits
+ * - the faulting virtual address has no L1, L2, or L3 mapping
+ */
+ mov r16=cr.ifa // get address that caused the TLB miss
+#ifdef CONFIG_HUGETLB_PAGE
+ movl r18=PAGE_SHIFT
+ mov r25=cr.itir
+#endif
+ ;;
+ rsm psr.dt // use physical addressing for data
+ mov r31=pr // save the predicate registers
+ mov r19=IA64_KR(PT_BASE) // get page table base address
+ shl r21=r16,3 // shift bit 60 into sign bit
+ shr.u r17=r16,61 // get the region number into r17
+ ;;
+ shr r22=r21,3
+#ifdef CONFIG_HUGETLB_PAGE
+ extr.u r26=r25,2,6
+ ;;
+ cmp.ne p8,p0=r18,r26
+ sub r27=r26,r18
+ ;;
+(p8) dep r25=r18,r25,2,6
+(p8) shr r22=r22,r27
+#endif
+ ;;
+ cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5?
+ shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address
+ ;;
+(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+
+ srlz.d
+ LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
+
+ .pred.rel "mutex", p6, p7
+(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
+ ;;
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+ cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
+ shr.u r18=r22,PMD_SHIFT // shift L2 index into position
+ ;;
+ ld8 r17=[r17] // fetch the L1 entry (may be 0)
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
+ ;;
+(p7) ld8 r20=[r17] // fetch the L2 entry (may be 0)
+ shr.u r19=r22,PAGE_SHIFT // shift L3 index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL?
+ dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+ ;;
+(p7) ld8 r18=[r21] // read the L3 PTE
+ mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss
+ ;;
+(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared?
+ mov r22=cr.iha // get the VHPT address that caused the TLB miss
+ ;; // avoid RAW on p7
+(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss?
+ dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address
+ ;;
+(p10) itc.i r18 // insert the instruction TLB entry
+(p11) itc.d r18 // insert the data TLB entry
+(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault)
+ mov cr.ifa=r22
+
+#ifdef CONFIG_HUGETLB_PAGE
+(p8) mov cr.itir=r25 // change to default page-size for VHPT
+#endif
+
+ /*
+ * Now compute and insert the TLB entry for the virtual page table. We never
+ * execute in a page table page so there is no need to set the exception deferral
+ * bit.
+ */
+ adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23
+ ;;
+(p7) itc.d r24
+ ;;
+#ifdef CONFIG_SMP
+ /*
+ * Tell the assemblers dependency-violation checker that the above "itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ /*
+ * Re-check L2 and L3 pagetable. If they changed, we may have received a ptc.g
+ * between reading the pagetable and the "itc". If so, flush the entry we
+ * inserted and retry.
+ */
+ ld8 r25=[r21] // read L3 PTE again
+ ld8 r26=[r17] // read L2 entry again
+ ;;
+ cmp.ne p6,p7=r26,r20 // did L2 entry change
+ mov r27=PAGE_SHIFT<<2
+ ;;
+(p6) ptc.l r22,r27 // purge PTE page translation
+(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did L3 PTE change
+ ;;
+(p6) ptc.l r16,r27 // purge translation
+#endif
+
+ mov pr=r31,-1 // restore predicate registers
+ rfi
+END(vhpt_miss)
+
+ .org ia64_ivt+0x400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ENTRY(itlb_miss)
+ DBG_FAULT(1)
+ /*
+ * The ITLB handler accesses the L3 PTE via the virtually mapped linear
+ * page table. If a nested TLB miss occurs, we switch into physical
+ * mode, walk the page table, and then re-execute the L3 PTE read
+ * and go on normally after that.
+ */
+ mov r16=cr.ifa // get virtual address
+ mov r29=b0 // save b0
+ mov r31=pr // save predicates
+.itlb_fault:
+ mov r17=cr.iha // get virtual address of L3 PTE
+ movl r30=1f // load nested fault continuation point
+ ;;
+1: ld8 r18=[r17] // read L3 PTE
+ ;;
+ mov b0=r29
+ tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
+(p6) br.cond.spnt page_fault
+ ;;
+ itc.i r18
+ ;;
+#ifdef CONFIG_SMP
+ /*
+ * Tell the assemblers dependency-violation checker that the above "itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ ld8 r19=[r17] // read L3 PTE again and see if same
+ mov r20=PAGE_SHIFT<<2 // setup page size for purge
+ ;;
+ cmp.ne p7,p0=r18,r19
+ ;;
+(p7) ptc.l r16,r20
+#endif
+ mov pr=r31,-1
+ rfi
+END(itlb_miss)
+
+ .org ia64_ivt+0x0800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ENTRY(dtlb_miss)
+ DBG_FAULT(2)
+ /*
+ * The DTLB handler accesses the L3 PTE via the virtually mapped linear
+ * page table. If a nested TLB miss occurs, we switch into physical
+ * mode, walk the page table, and then re-execute the L3 PTE read
+ * and go on normally after that.
+ */
+ mov r16=cr.ifa // get virtual address
+ mov r29=b0 // save b0
+ mov r31=pr // save predicates
+dtlb_fault:
+ mov r17=cr.iha // get virtual address of L3 PTE
+ movl r30=1f // load nested fault continuation point
+ ;;
+1: ld8 r18=[r17] // read L3 PTE
+ ;;
+ mov b0=r29
+ tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
+(p6) br.cond.spnt page_fault
+ ;;
+ itc.d r18
+ ;;
+#ifdef CONFIG_SMP
+ /*
+ * Tell the assemblers dependency-violation checker that the above "itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ ld8 r19=[r17] // read L3 PTE again and see if same
+ mov r20=PAGE_SHIFT<<2 // setup page size for purge
+ ;;
+ cmp.ne p7,p0=r18,r19
+ ;;
+(p7) ptc.l r16,r20
+#endif
+ mov pr=r31,-1
+ rfi
+END(dtlb_miss)
+
+ .org ia64_ivt+0x0c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ENTRY(alt_itlb_miss)
+ DBG_FAULT(3)
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=PAGE_KERNEL
+ mov r21=cr.ipsr
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ mov r31=pr
+ ;;
+#ifdef CONFIG_DISABLE_VHPT
+ shr.u r22=r16,61 // get the region number into r21
+ ;;
+ cmp.gt p8,p0=6,r22 // user mode
+ ;;
+(p8) thash r17=r16
+ ;;
+(p8) mov cr.iha=r17
+(p8) mov r29=b0 // save b0
+(p8) br.cond.dptk .itlb_fault
+#endif
+ extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
+ and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
+ shr.u r18=r16,57 // move address bit 61 to bit 4
+ ;;
+ andcm r18=0x10,r18 // bit 4=~address-bit(61)
+ cmp.ne p8,p0=r0,r23 // psr.cpl != 0?
+ or r19=r17,r19 // insert PTE control bits into r19
+ ;;
+ or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6
+(p8) br.cond.spnt page_fault
+ ;;
+ itc.i r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
+END(alt_itlb_miss)
+
+ .org ia64_ivt+0x1000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ENTRY(alt_dtlb_miss)
+ DBG_FAULT(4)
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=PAGE_KERNEL
+ mov r20=cr.isr
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ mov r21=cr.ipsr
+ mov r31=pr
+ ;;
+#ifdef CONFIG_DISABLE_VHPT
+ shr.u r22=r16,61 // get the region number into r21
+ ;;
+ cmp.gt p8,p0=6,r22 // access to region 0-5
+ ;;
+(p8) thash r17=r16
+ ;;
+(p8) mov cr.iha=r17
+(p8) mov r29=b0 // save b0
+(p8) br.cond.dptk dtlb_fault
+#endif
+ extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
+ and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field
+ tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on?
+ shr.u r18=r16,57 // move address bit 61 to bit 4
+ and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
+ tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
+ ;;
+ andcm r18=0x10,r18 // bit 4=~address-bit(61)
+ cmp.ne p8,p0=r0,r23
+(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
+(p8) br.cond.spnt page_fault
+
+ dep r21=-1,r21,IA64_PSR_ED_BIT,1
+ or r19=r19,r17 // insert PTE control bits into r19
+ ;;
+ or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6
+(p6) mov cr.ipsr=r21
+ ;;
+(p7) itc.d r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
+END(alt_dtlb_miss)
+
+ .org ia64_ivt+0x1400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ENTRY(nested_dtlb_miss)
+ /*
+ * In the absence of kernel bugs, we get here when the virtually mapped linear
+ * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction
+ * Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page
+ * table is missing, a nested TLB miss fault is triggered and control is
+ * transferred to this point. When this happens, we lookup the pte for the
+ * faulting address by walking the page table in physical mode and return to the
+ * continuation point passed in register r30 (or call page_fault if the address is
+ * not mapped).
+ *
+ * Input: r16: faulting address
+ * r29: saved b0
+ * r30: continuation address
+ * r31: saved pr
+ *
+ * Output: r17: physical address of L3 PTE of faulting address
+ * r29: saved b0
+ * r30: continuation address
+ * r31: saved pr
+ *
+ * Clobbered: b0, r18, r19, r21, psr.dt (cleared)
+ */
+ rsm psr.dt // switch to using physical data addressing
+ mov r19=IA64_KR(PT_BASE) // get the page table base address
+ shl r21=r16,3 // shift bit 60 into sign bit
+ ;;
+ shr.u r17=r16,61 // get the region number into r17
+ ;;
+ cmp.eq p6,p7=5,r17 // is faulting address in region 5?
+ shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address
+ ;;
+(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+
+ srlz.d
+ LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
+
+ .pred.rel "mutex", p6, p7
+(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
+ ;;
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+ cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
+ shr.u r18=r16,PMD_SHIFT // shift L2 index into position
+ ;;
+ ld8 r17=[r17] // fetch the L1 entry (may be 0)
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
+ ;;
+(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
+ shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
+ dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+(p6) br.cond.spnt page_fault
+ mov b0=r30
+ br.sptk.many b0 // return to continuation point
+END(nested_dtlb_miss)
+
+ .org ia64_ivt+0x1800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ENTRY(ikey_miss)
+ DBG_FAULT(6)
+ FAULT(6)
+END(ikey_miss)
+
+ //-----------------------------------------------------------------------------------
+ // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
+ENTRY(page_fault)
+ ssm psr.dt
+ ;;
+ srlz.i
+ ;;
+ SAVE_MIN_WITH_COVER
+ alloc r15=ar.pfs,0,0,3,0
+ mov out0=cr.ifa
+ mov out1=cr.isr
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collectin is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ movl r14=ia64_leave_kernel
+ ;;
+ SAVE_REST
+ mov rp=r14
+ ;;
+ adds out2=16,r12 // out2 = pointer to pt_regs
+ br.call.sptk.many b6=ia64_do_page_fault // ignore return address
+END(page_fault)
+
+ .org ia64_ivt+0x1c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ENTRY(dkey_miss)
+ DBG_FAULT(7)
+ FAULT(7)
+END(dkey_miss)
+
+ .org ia64_ivt+0x2000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ENTRY(dirty_bit)
+ DBG_FAULT(8)
+ /*
+ * What we do here is to simply turn on the dirty bit in the PTE. We need to
+ * update both the page-table and the TLB entry. To efficiently access the PTE,
+ * we address it through the virtual page table. Most likely, the TLB entry for
+ * the relevant virtual page table page is still present in the TLB so we can
+ * normally do this without additional TLB misses. In case the necessary virtual
+ * page table TLB entry isn't present, we take a nested TLB miss hit where we look
+ * up the physical address of the L3 PTE and then continue at label 1 below.
+ */
+ mov r16=cr.ifa // get the address that caused the fault
+ movl r30=1f // load continuation point in case of nested fault
+ ;;
+ thash r17=r16 // compute virtual address of L3 PTE
+ mov r29=b0 // save b0 in case of nested fault
+ mov r31=pr // save pr
+#ifdef CONFIG_SMP
+ mov r28=ar.ccv // save ar.ccv
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ mov ar.ccv=r18 // set compare value for cmpxchg
+ or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits
+ ;;
+ cmpxchg8.acq r26=[r17],r25,ar.ccv
+ mov r24=PAGE_SHIFT<<2
+ ;;
+ cmp.eq p6,p7=r26,r18
+ ;;
+(p6) itc.d r25 // install updated PTE
+ ;;
+ /*
+ * Tell the assemblers dependency-violation checker that the above "itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ ld8 r18=[r17] // read PTE again
+ ;;
+ cmp.eq p6,p7=r18,r25 // is it same as the newly installed
+ ;;
+(p7) ptc.l r16,r24
+ mov b0=r29 // restore b0
+ mov ar.ccv=r28
+#else
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits
+ mov b0=r29 // restore b0
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.d r18 // install updated PTE
+#endif
+ mov pr=r31,-1 // restore pr
+ rfi
+END(dirty_bit)
+
+ .org ia64_ivt+0x2400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ENTRY(iaccess_bit)
+ DBG_FAULT(9)
+ // Like Entry 8, except for instruction access
+ mov r16=cr.ifa // get the address that caused the fault
+ movl r30=1f // load continuation point in case of nested fault
+ mov r31=pr // save predicates
+#ifdef CONFIG_ITANIUM
+ /*
+ * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
+ */
+ mov r17=cr.ipsr
+ ;;
+ mov r18=cr.iip
+ tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set?
+ ;;
+(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa
+#endif /* CONFIG_ITANIUM */
+ ;;
+ thash r17=r16 // compute virtual address of L3 PTE
+ mov r29=b0 // save b0 in case of nested fault)
+#ifdef CONFIG_SMP
+ mov r28=ar.ccv // save ar.ccv
+ ;;
+1: ld8 r18=[r17]
+ ;;
+ mov ar.ccv=r18 // set compare value for cmpxchg
+ or r25=_PAGE_A,r18 // set the accessed bit
+ ;;
+ cmpxchg8.acq r26=[r17],r25,ar.ccv
+ mov r24=PAGE_SHIFT<<2
+ ;;
+ cmp.eq p6,p7=r26,r18
+ ;;
+(p6) itc.i r25 // install updated PTE
+ ;;
+ /*
+ * Tell the assemblers dependency-violation checker that the above "itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ ld8 r18=[r17] // read PTE again
+ ;;
+ cmp.eq p6,p7=r18,r25 // is it same as the newly installed
+ ;;
+(p7) ptc.l r16,r24
+ mov b0=r29 // restore b0
+ mov ar.ccv=r28
+#else /* !CONFIG_SMP */
+ ;;
+1: ld8 r18=[r17]
+ ;;
+ or r18=_PAGE_A,r18 // set the accessed bit
+ mov b0=r29 // restore b0
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.i r18 // install updated PTE
+#endif /* !CONFIG_SMP */
+ mov pr=r31,-1
+ rfi
+END(iaccess_bit)
+
+ .org ia64_ivt+0x2800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ENTRY(daccess_bit)
+ DBG_FAULT(10)
+ // Like Entry 8, except for data access
+ mov r16=cr.ifa // get the address that caused the fault
+ movl r30=1f // load continuation point in case of nested fault
+ ;;
+ thash r17=r16 // compute virtual address of L3 PTE
+ mov r31=pr
+ mov r29=b0 // save b0 in case of nested fault)
+#ifdef CONFIG_SMP
+ mov r28=ar.ccv // save ar.ccv
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ mov ar.ccv=r18 // set compare value for cmpxchg
+ or r25=_PAGE_A,r18 // set the dirty bit
+ ;;
+ cmpxchg8.acq r26=[r17],r25,ar.ccv
+ mov r24=PAGE_SHIFT<<2
+ ;;
+ cmp.eq p6,p7=r26,r18
+ ;;
+(p6) itc.d r25 // install updated PTE
+ /*
+ * Tell the assemblers dependency-violation checker that the above "itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+ ;;
+ ld8 r18=[r17] // read PTE again
+ ;;
+ cmp.eq p6,p7=r18,r25 // is it same as the newly installed
+ ;;
+(p7) ptc.l r16,r24
+ mov ar.ccv=r28
+#else
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ or r18=_PAGE_A,r18 // set the accessed bit
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.d r18 // install updated PTE
+#endif
+ mov b0=r29 // restore b0
+ mov pr=r31,-1
+ rfi
+END(daccess_bit)
+
+ .org ia64_ivt+0x2c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ENTRY(break_fault)
+ /*
+ * The streamlined system call entry/exit paths only save/restore the initial part
+ * of pt_regs. This implies that the callers of system-calls must adhere to the
+ * normal procedure calling conventions.
+ *
+ * Registers to be saved & restored:
+ * CR registers: cr.ipsr, cr.iip, cr.ifs
+ * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr
+ * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15
+ * Registers to be restored only:
+ * r8-r11: output value from the system call.
+ *
+ * During system call exit, scratch registers (including r15) are modified/cleared
+ * to prevent leaking bits from kernel to user level.
+ */
+ DBG_FAULT(11)
+ mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat.
+ mov r17=cr.iim
+ mov r18=__IA64_BREAK_SYSCALL
+ mov r21=ar.fpsr
+ mov r29=cr.ipsr
+ mov r19=b6
+ mov r25=ar.unat
+ mov r27=ar.rsc
+ mov r26=ar.pfs
+ mov r28=cr.iip
+ mov r31=pr // prepare to save predicates
+ mov r20=r1
+ ;;
+ adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
+ cmp.eq p0,p7=r18,r17 // is this a system call? (p7 <- false, if so)
+(p7) br.cond.spnt non_syscall
+ ;;
+ ld1 r17=[r16] // load current->thread.on_ustack flag
+ st1 [r16]=r0 // clear current->thread.on_ustack flag
+ add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT
+ ;;
+ invala
+
+ /* adjust return address so we skip over the break instruction: */
+
+ extr.u r8=r29,41,2 // extract ei field from cr.ipsr
+ ;;
+ cmp.eq p6,p7=2,r8 // isr.ei==2?
+ mov r2=r1 // setup r2 for ia64_syscall_setup
+ ;;
+(p6) mov r8=0 // clear ei to 0
+(p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped
+(p7) adds r8=1,r8 // increment ei to next slot
+ ;;
+ cmp.eq pKStk,pUStk=r0,r17 // are we in kernel mode already?
+ dep r29=r8,r29,41,2 // insert new ei into cr.ipsr
+ ;;
+
+ // switch from user to kernel RBS:
+ MINSTATE_START_SAVE_MIN_VIRT
+ br.call.sptk.many b7=ia64_syscall_setup
+ ;;
+ MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ mov r3=NR_syscalls - 1
+ ;;
+(p15) ssm psr.i // restore psr.i
+ // p10==true means out registers are more than 8 or r15's Nat is true
+(p10) br.cond.spnt.many ia64_ret_from_syscall
+ ;;
+ movl r16=sys_call_table
+
+ adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024
+ movl r2=ia64_ret_from_syscall
+ ;;
+ shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024)
+ cmp.leu p6,p7=r15,r3 // (syscall > 0 && syscall < 1024 + NR_syscalls) ?
+ mov rp=r2 // set the real return addr
+ ;;
+(p6) ld8 r20=[r20] // load address of syscall entry point
+(p7) movl r20=sys_ni_syscall
+
+ add r2=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+ ld4 r2=[r2] // r2 = current_thread_info()->flags
+ ;;
+ and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit
+ ;;
+ cmp.eq p8,p0=r2,r0
+ mov b6=r20
+ ;;
+(p8) br.call.sptk.many b6=b6 // ignore this return addr
+ br.cond.sptk ia64_trace_syscall
+ // NOT REACHED
+END(break_fault)
+
+ .org ia64_ivt+0x3000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ENTRY(interrupt)
+ DBG_FAULT(12)
+ mov r31=pr // prepare to save predicates
+ ;;
+ SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ srlz.i // ensure everybody knows psr.ic is back on
+ ;;
+ SAVE_REST
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+ mov out0=cr.ivr // pass cr.ivr as first arg
+ add out1=16,sp // pass pointer to pt_regs as second arg
+ ;;
+ srlz.d // make sure we see the effect of cr.ivr
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=ia64_handle_irq
+END(interrupt)
+
+ .org ia64_ivt+0x3400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+ DBG_FAULT(13)
+ FAULT(13)
+
+ .org ia64_ivt+0x3800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+ DBG_FAULT(14)
+ FAULT(14)
+
+ /*
+ * There is no particular reason for this code to be here, other than that
+ * there happens to be space here that would go unused otherwise. If this
+ * fault ever gets "unreserved", simply moved the following code to a more
+ * suitable spot...
+ *
+ * ia64_syscall_setup() is a separate subroutine so that it can
+ * allocate stacked registers so it can safely demine any
+ * potential NaT values from the input registers.
+ *
+ * On entry:
+ * - executing on bank 0 or bank 1 register set (doesn't matter)
+ * - r1: stack pointer
+ * - r2: current task pointer
+ * - r3: preserved
+ * - r11: original contents (saved ar.pfs to be saved)
+ * - r12: original contents (sp to be saved)
+ * - r13: original contents (tp to be saved)
+ * - r15: original contents (syscall # to be saved)
+ * - r18: saved bsp (after switching to kernel stack)
+ * - r19: saved b6
+ * - r20: saved r1 (gp)
+ * - r21: saved ar.fpsr
+ * - r22: kernel's register backing store base (krbs_base)
+ * - r23: saved ar.bspstore
+ * - r24: saved ar.rnat
+ * - r25: saved ar.unat
+ * - r26: saved ar.pfs
+ * - r27: saved ar.rsc
+ * - r28: saved cr.iip
+ * - r29: saved cr.ipsr
+ * - r31: saved pr
+ * - b0: original contents (to be saved)
+ * On exit:
+ * - executing on bank 1 registers
+ * - psr.ic enabled, interrupts restored
+ * - p10: TRUE if syscall is invoked with more than 8 out
+ * registers or r15's Nat is true
+ * - r1: kernel's gp
+ * - r3: preserved (same as on entry)
+ * - r8: -EINVAL if p10 is true
+ * - r12: points to kernel stack
+ * - r13: points to current task
+ * - p15: TRUE if interrupts need to be re-enabled
+ * - ar.fpsr: set to kernel settings
+ */
+GLOBAL_ENTRY(ia64_syscall_setup)
+#if PT(B6) != 0
+# error This code assumes that b6 is the first field in pt_regs.
+#endif
+ st8 [r1]=r19 // save b6
+ add r16=PT(CR_IPSR),r1 // initialize first base pointer
+ add r17=PT(R11),r1 // initialize second base pointer
+ ;;
+ alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable
+ st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr
+ tnat.nz p8,p0=in0
+
+ st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11
+ tnat.nz p9,p0=in1
+(pKStk) mov r18=r0 // make sure r18 isn't NaT
+ ;;
+
+ st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs
+ st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip
+ mov r28=b0 // save b0 (2 cyc)
+ ;;
+
+ st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat
+ dep r19=0,r19,38,26 // clear all bits but 0..37 [I0]
+(p8) mov in0=-1
+ ;;
+
+ st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs
+ extr.u r11=r19,7,7 // I0 // get sol of ar.pfs
+ and r8=0x7f,r19 // A // get sof of ar.pfs
+
+ st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
+ tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0
+(p9) mov in1=-1
+ ;;
+
+(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8
+ tnat.nz p10,p0=in2
+ add r11=8,r11
+ ;;
+(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field
+(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field
+ tnat.nz p11,p0=in3
+ ;;
+(p10) mov in2=-1
+ tnat.nz p12,p0=in4 // [I0]
+(p11) mov in3=-1
+ ;;
+(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat
+(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore
+ shl r18=r18,16 // compute ar.rsc to be used for "loadrs"
+ ;;
+ st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates
+ st8 [r17]=r28,PT(R1)-PT(B0) // save b0
+ tnat.nz p13,p0=in5 // [I0]
+ ;;
+ st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs"
+ st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1
+(p12) mov in4=-1
+ ;;
+
+.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12
+.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13
+(p13) mov in5=-1
+ ;;
+ st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr
+ tnat.nz p14,p0=in6
+ cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8
+ ;;
+ stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error)
+(p9) tnat.nz p10,p0=r15
+ adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch)
+
+ st8.spill [r17]=r15 // save r15
+ tnat.nz p8,p0=in7
+ nop.i 0
+
+ mov r13=r2 // establish `current'
+ movl r1=__gp // establish kernel global pointer
+ ;;
+(p14) mov in6=-1
+(p8) mov in7=-1
+ nop.i 0
+
+ cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
+ movl r17=FPSR_DEFAULT
+ ;;
+ mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value
+(p10) mov r8=-EINVAL
+ br.ret.sptk.many b7
+END(ia64_syscall_setup)
+
+ .org ia64_ivt+0x3c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+ DBG_FAULT(15)
+ FAULT(15)
+
+ /*
+ * Squatting in this space ...
+ *
+ * This special case dispatcher for illegal operation faults allows preserved
+ * registers to be modified through a callback function (asm only) that is handed
+ * back from the fault handler in r8. Up to three arguments can be passed to the
+ * callback function by returning an aggregate with the callback as its first
+ * element, followed by the arguments.
+ */
+ENTRY(dispatch_illegal_op_fault)
+ .prologue
+ .body
+ SAVE_MIN_WITH_COVER
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ ;;
+ alloc r14=ar.pfs,0,0,1,0 // must be first in insn group
+ mov out0=ar.ec
+ ;;
+ SAVE_REST
+ PT_REGS_UNWIND_INFO(0)
+ ;;
+ br.call.sptk.many rp=ia64_illegal_op_fault
+.ret0: ;;
+ alloc r14=ar.pfs,0,0,3,0 // must be first in insn group
+ mov out0=r9
+ mov out1=r10
+ mov out2=r11
+ movl r15=ia64_leave_kernel
+ ;;
+ mov rp=r15
+ mov b6=r8
+ ;;
+ cmp.ne p6,p0=0,r8
+(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel
+ br.sptk.many ia64_leave_kernel
+END(dispatch_illegal_op_fault)
+
+ .org ia64_ivt+0x4000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+ DBG_FAULT(16)
+ FAULT(16)
+
+ .org ia64_ivt+0x4400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+ DBG_FAULT(17)
+ FAULT(17)
+
+ENTRY(non_syscall)
+ SAVE_MIN_WITH_COVER
+
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
+
+ alloc r14=ar.pfs,0,0,2,0
+ mov out0=cr.iim
+ add out1=16,sp
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ movl r15=ia64_leave_kernel
+ ;;
+ SAVE_REST
+ mov rp=r15
+ ;;
+ br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr
+END(non_syscall)
+
+ .org ia64_ivt+0x4800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+ DBG_FAULT(18)
+ FAULT(18)
+
+ /*
+ * There is no particular reason for this code to be here, other than that
+ * there happens to be space here that would go unused otherwise. If this
+ * fault ever gets "unreserved", simply moved the following code to a more
+ * suitable spot...
+ */
+
+ENTRY(dispatch_unaligned_handler)
+ SAVE_MIN_WITH_COVER
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
+ mov out0=cr.ifa
+ adds out1=16,sp
+
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.sptk.many ia64_prepare_handle_unaligned
+END(dispatch_unaligned_handler)
+
+ .org ia64_ivt+0x4c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+ DBG_FAULT(19)
+ FAULT(19)
+
+ /*
+ * There is no particular reason for this code to be here, other than that
+ * there happens to be space here that would go unused otherwise. If this
+ * fault ever gets "unreserved", simply moved the following code to a more
+ * suitable spot...
+ */
+
+ENTRY(dispatch_to_fault_handler)
+ /*
+ * Input:
+ * psr.ic: off
+ * r19: fault vector number (e.g., 24 for General Exception)
+ * r31: contains saved predicates (pr)
+ */
+ SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,5,0
+ mov out0=r15
+ mov out1=cr.isr
+ mov out2=cr.ifa
+ mov out3=cr.iim
+ mov out4=cr.itir
+ ;;
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=ia64_fault
+END(dispatch_to_fault_handler)
+
+//
+// --- End of long entries, Beginning of short entries
+//
+
+ .org ia64_ivt+0x5000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
+ENTRY(page_not_present)
+ DBG_FAULT(20)
+ mov r16=cr.ifa
+ rsm psr.dt
+ /*
+ * The Linux page fault handler doesn't expect non-present pages to be in
+ * the TLB. Flush the existing entry now, so we meet that expectation.
+ */
+ mov r17=PAGE_SHIFT<<2
+ ;;
+ ptc.l r16,r17
+ ;;
+ mov r31=pr
+ srlz.d
+ br.sptk.many page_fault
+END(page_not_present)
+
+ .org ia64_ivt+0x5100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
+ENTRY(key_permission)
+ DBG_FAULT(21)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.sptk.many page_fault
+END(key_permission)
+
+ .org ia64_ivt+0x5200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ENTRY(iaccess_rights)
+ DBG_FAULT(22)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.sptk.many page_fault
+END(iaccess_rights)
+
+ .org ia64_ivt+0x5300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ENTRY(daccess_rights)
+ DBG_FAULT(23)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.sptk.many page_fault
+END(daccess_rights)
+
+ .org ia64_ivt+0x5400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ENTRY(general_exception)
+ DBG_FAULT(24)
+ mov r16=cr.isr
+ mov r31=pr
+ ;;
+ cmp4.eq p6,p0=0,r16
+(p6) br.sptk.many dispatch_illegal_op_fault
+ ;;
+ mov r19=24 // fault number
+ br.sptk.many dispatch_to_fault_handler
+END(general_exception)
+
+ .org ia64_ivt+0x5500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ENTRY(disabled_fp_reg)
+ DBG_FAULT(25)
+ rsm psr.dfh // ensure we can access fph
+ ;;
+ srlz.d
+ mov r31=pr
+ mov r19=25
+ br.sptk.many dispatch_to_fault_handler
+END(disabled_fp_reg)
+
+ .org ia64_ivt+0x5600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ENTRY(nat_consumption)
+ DBG_FAULT(26)
+ FAULT(26)
+END(nat_consumption)
+
+ .org ia64_ivt+0x5700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ENTRY(speculation_vector)
+ DBG_FAULT(27)
+ /*
+ * A [f]chk.[as] instruction needs to take the branch to the recovery code but
+ * this part of the architecture is not implemented in hardware on some CPUs, such
+ * as Itanium. Thus, in general we need to emulate the behavior. IIM contains
+ * the relative target (not yet sign extended). So after sign extending it we
+ * simply add it to IIP. We also need to reset the EI field of the IPSR to zero,
+ * i.e., the slot to restart into.
+ *
+ * cr.imm contains zero_ext(imm21)
+ */
+ mov r18=cr.iim
+ ;;
+ mov r17=cr.iip
+ shl r18=r18,43 // put sign bit in position (43=64-21)
+ ;;
+
+ mov r16=cr.ipsr
+ shr r18=r18,39 // sign extend (39=43-4)
+ ;;
+
+ add r17=r17,r18 // now add the offset
+ ;;
+ mov cr.iip=r17
+ dep r16=0,r16,41,2 // clear EI
+ ;;
+
+ mov cr.ipsr=r16
+ ;;
+
+ rfi // and go back
+END(speculation_vector)
+
+ .org ia64_ivt+0x5800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+ DBG_FAULT(28)
+ FAULT(28)
+
+ .org ia64_ivt+0x5900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ENTRY(debug_vector)
+ DBG_FAULT(29)
+ FAULT(29)
+END(debug_vector)
+
+ .org ia64_ivt+0x5a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ENTRY(unaligned_access)
+ DBG_FAULT(30)
+ mov r16=cr.ipsr
+ mov r31=pr // prepare to save predicates
+ ;;
+ br.sptk.many dispatch_unaligned_handler
+END(unaligned_access)
+
+ .org ia64_ivt+0x5b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(unsupported_data_reference)
+ DBG_FAULT(31)
+ FAULT(31)
+END(unsupported_data_reference)
+
+ .org ia64_ivt+0x5c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
+ENTRY(floating_point_fault)
+ DBG_FAULT(32)
+ FAULT(32)
+END(floating_point_fault)
+
+ .org ia64_ivt+0x5d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(floating_point_trap)
+ DBG_FAULT(33)
+ FAULT(33)
+END(floating_point_trap)
+
+ .org ia64_ivt+0x5e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(lower_privilege_trap)
+ DBG_FAULT(34)
+ FAULT(34)
+END(lower_privilege_trap)
+
+ .org ia64_ivt+0x5f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(taken_branch_trap)
+ DBG_FAULT(35)
+ FAULT(35)
+END(taken_branch_trap)
+
+ .org ia64_ivt+0x6000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(single_step_trap)
+ DBG_FAULT(36)
+ FAULT(36)
+END(single_step_trap)
+
+ .org ia64_ivt+0x6100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Reserved
+ DBG_FAULT(37)
+ FAULT(37)
+
+ .org ia64_ivt+0x6200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+ DBG_FAULT(38)
+ FAULT(38)
+
+ .org ia64_ivt+0x6300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+ DBG_FAULT(39)
+ FAULT(39)
+
+ .org ia64_ivt+0x6400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+ DBG_FAULT(40)
+ FAULT(40)
+
+ .org ia64_ivt+0x6500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+ DBG_FAULT(41)
+ FAULT(41)
+
+ .org ia64_ivt+0x6600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+ DBG_FAULT(42)
+ FAULT(42)
+
+ .org ia64_ivt+0x6700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+ DBG_FAULT(43)
+ FAULT(43)
+
+ .org ia64_ivt+0x6800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+ DBG_FAULT(44)
+ FAULT(44)
+
+ .org ia64_ivt+0x6900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ENTRY(ia32_exception)
+ DBG_FAULT(45)
+ FAULT(45)
+END(ia32_exception)
+
+ .org ia64_ivt+0x6a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
+ENTRY(ia32_intercept)
+ DBG_FAULT(46)
+#ifdef CONFIG_IA32_SUPPORT
+ mov r31=pr
+ mov r16=cr.isr
+ ;;
+ extr.u r17=r16,16,8 // get ISR.code
+ mov r18=ar.eflag
+ mov r19=cr.iim // old eflag value
+ ;;
+ cmp.ne p6,p0=2,r17
+(p6) br.cond.spnt 1f // not a system flag fault
+ xor r16=r18,r19
+ ;;
+ extr.u r17=r16,18,1 // get the eflags.ac bit
+ ;;
+ cmp.eq p6,p0=0,r17
+(p6) br.cond.spnt 1f // eflags.ac bit didn't change
+ ;;
+ mov pr=r31,-1 // restore predicate registers
+ rfi
+
+1:
+#endif // CONFIG_IA32_SUPPORT
+ FAULT(46)
+END(ia32_intercept)
+
+ .org ia64_ivt+0x6b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
+ENTRY(ia32_interrupt)
+ DBG_FAULT(47)
+#ifdef CONFIG_IA32_SUPPORT
+ mov r31=pr
+ br.sptk.many dispatch_to_ia32_handler
+#else
+ FAULT(47)
+#endif
+END(ia32_interrupt)
+
+ .org ia64_ivt+0x6c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+ DBG_FAULT(48)
+ FAULT(48)
+
+ .org ia64_ivt+0x6d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+ DBG_FAULT(49)
+ FAULT(49)
+
+ .org ia64_ivt+0x6e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+ DBG_FAULT(50)
+ FAULT(50)
+
+ .org ia64_ivt+0x6f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+ DBG_FAULT(51)
+ FAULT(51)
+
+ .org ia64_ivt+0x7000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7000 Entry 52 (size 16 bundles) Reserved
+ DBG_FAULT(52)
+ FAULT(52)
+
+ .org ia64_ivt+0x7100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+ DBG_FAULT(53)
+ FAULT(53)
+
+ .org ia64_ivt+0x7200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+ DBG_FAULT(54)
+ FAULT(54)
+
+ .org ia64_ivt+0x7300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+ DBG_FAULT(55)
+ FAULT(55)
+
+ .org ia64_ivt+0x7400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+ DBG_FAULT(56)
+ FAULT(56)
+
+ .org ia64_ivt+0x7500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+ DBG_FAULT(57)
+ FAULT(57)
+
+ .org ia64_ivt+0x7600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+ DBG_FAULT(58)
+ FAULT(58)
+
+ .org ia64_ivt+0x7700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+ DBG_FAULT(59)
+ FAULT(59)
+
+ .org ia64_ivt+0x7800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+ DBG_FAULT(60)
+ FAULT(60)
+
+ .org ia64_ivt+0x7900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+ DBG_FAULT(61)
+ FAULT(61)
+
+ .org ia64_ivt+0x7a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+ DBG_FAULT(62)
+ FAULT(62)
+
+ .org ia64_ivt+0x7b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+ DBG_FAULT(63)
+ FAULT(63)
+
+ .org ia64_ivt+0x7c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+ DBG_FAULT(64)
+ FAULT(64)
+
+ .org ia64_ivt+0x7d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+ DBG_FAULT(65)
+ FAULT(65)
+
+ .org ia64_ivt+0x7e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+ DBG_FAULT(66)
+ FAULT(66)
+
+ .org ia64_ivt+0x7f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+ DBG_FAULT(67)
+ FAULT(67)
+
+#ifdef CONFIG_IA32_SUPPORT
+
+ /*
+ * There is no particular reason for this code to be here, other than that
+ * there happens to be space here that would go unused otherwise. If this
+ * fault ever gets "unreserved", simply moved the following code to a more
+ * suitable spot...
+ */
+
+ // IA32 interrupt entry point
+
+ENTRY(dispatch_to_ia32_handler)
+ SAVE_MIN
+ ;;
+ mov r14=cr.isr
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i
+ adds r3=8,r2 // Base pointer for SAVE_REST
+ ;;
+ SAVE_REST
+ ;;
+ mov r15=0x80
+ shr r14=r14,16 // Get interrupt number
+ ;;
+ cmp.ne p6,p0=r14,r15
+(p6) br.call.dpnt.many b6=non_ia32_syscall
+
+ adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions
+ adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp
+ ;;
+ cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
+ ld8 r8=[r14] // get r8
+ ;;
+ st8 [r15]=r8 // save original EAX in r1 (IA32 procs don't use the GP)
+ ;;
+ alloc r15=ar.pfs,0,0,6,0 // must first in an insn group
+ ;;
+ ld4 r8=[r14],8 // r8 == eax (syscall number)
+ mov r15=IA32_NR_syscalls
+ ;;
+ cmp.ltu.unc p6,p7=r8,r15
+ ld4 out1=[r14],8 // r9 == ecx
+ ;;
+ ld4 out2=[r14],8 // r10 == edx
+ ;;
+ ld4 out0=[r14] // r11 == ebx
+ adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp
+ ;;
+ ld4 out5=[r14],PT(R14)-PT(R13) // r13 == ebp
+ ;;
+ ld4 out3=[r14],PT(R15)-PT(R14) // r14 == esi
+ adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+ ld4 out4=[r14] // r15 == edi
+ movl r16=ia32_syscall_table
+ ;;
+(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number
+ ld4 r2=[r2] // r2 = current_thread_info()->flags
+ ;;
+ ld8 r16=[r16]
+ and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit
+ ;;
+ mov b6=r16
+ movl r15=ia32_ret_from_syscall
+ cmp.eq p8,p0=r2,r0
+ ;;
+ mov rp=r15
+(p8) br.call.sptk.many b6=b6
+ br.cond.sptk ia32_trace_syscall
+
+non_ia32_syscall:
+ alloc r15=ar.pfs,0,0,2,0
+ mov out0=r14 // interrupt #
+ add out1=16,sp // pointer to pt_regs
+ ;; // avoid WAW on CFM
+ br.call.sptk.many rp=ia32_bad_interrupt
+.ret1: movl r15=ia64_leave_kernel
+ ;;
+ mov rp=r15
+ br.ret.sptk.many rp
+END(dispatch_to_ia32_handler)
+
+#endif /* CONFIG_IA32_SUPPORT */
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
new file mode 100644
index 00000000000..c3a04ee7f4f
--- /dev/null
+++ b/arch/ia64/kernel/machvec.c
@@ -0,0 +1,70 @@
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <asm/machvec.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_IA64_GENERIC
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include <asm/page.h>
+
+struct ia64_machine_vector ia64_mv;
+EXPORT_SYMBOL(ia64_mv);
+
+static struct ia64_machine_vector *
+lookup_machvec (const char *name)
+{
+ extern struct ia64_machine_vector machvec_start[];
+ extern struct ia64_machine_vector machvec_end[];
+ struct ia64_machine_vector *mv;
+
+ for (mv = machvec_start; mv < machvec_end; ++mv)
+ if (strcmp (mv->name, name) == 0)
+ return mv;
+
+ return 0;
+}
+
+void
+machvec_init (const char *name)
+{
+ struct ia64_machine_vector *mv;
+
+ mv = lookup_machvec(name);
+ if (!mv) {
+ panic("generic kernel failed to find machine vector for platform %s!", name);
+ }
+ ia64_mv = *mv;
+ printk(KERN_INFO "booting generic kernel on platform %s\n", name);
+}
+
+#endif /* CONFIG_IA64_GENERIC */
+
+void
+machvec_setup (char **arg)
+{
+}
+EXPORT_SYMBOL(machvec_setup);
+
+void
+machvec_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+{
+}
+EXPORT_SYMBOL(machvec_timer_interrupt);
+
+void
+machvec_dma_sync_single (struct device *hwdev, dma_addr_t dma_handle, size_t size, int dir)
+{
+ mb();
+}
+EXPORT_SYMBOL(machvec_dma_sync_single);
+
+void
+machvec_dma_sync_sg (struct device *hwdev, struct scatterlist *sg, int n, int dir)
+{
+ mb();
+}
+EXPORT_SYMBOL(machvec_dma_sync_sg);
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
new file mode 100644
index 00000000000..4d6c7b8f667
--- /dev/null
+++ b/arch/ia64/kernel/mca.c
@@ -0,0 +1,1470 @@
+/*
+ * File: mca.c
+ * Purpose: Generic MCA handling layer
+ *
+ * Updated for latest kernel
+ * Copyright (C) 2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Copyright (C) 2002 Dell Inc.
+ * Copyright (C) Matt Domsch (Matt_Domsch@dell.com)
+ *
+ * Copyright (C) 2002 Intel
+ * Copyright (C) Jenna Hall (jenna.s.hall@intel.com)
+ *
+ * Copyright (C) 2001 Intel
+ * Copyright (C) Fred Lewis (frederick.v.lewis@intel.com)
+ *
+ * Copyright (C) 2000 Intel
+ * Copyright (C) Chuck Fleckenstein (cfleck@co.intel.com)
+ *
+ * Copyright (C) 1999, 2004 Silicon Graphics, Inc.
+ * Copyright (C) Vijay Chander(vijay@engr.sgi.com)
+ *
+ * 03/04/15 D. Mosberger Added INIT backtrace support.
+ * 02/03/25 M. Domsch GUID cleanups
+ *
+ * 02/01/04 J. Hall Aligned MCA stack to 16 bytes, added platform vs. CPU
+ * error flag, set SAL default return values, changed
+ * error record structure to linked list, added init call
+ * to sal_get_state_info_size().
+ *
+ * 01/01/03 F. Lewis Added setup of CMCI and CPEI IRQs, logging of corrected
+ * platform errors, completed code for logging of
+ * corrected & uncorrected machine check errors, and
+ * updated for conformance with Nov. 2000 revision of the
+ * SAL 3.0 spec.
+ * 00/03/29 C. Fleckenstein Fixed PAL/SAL update issues, began MCA bug fixes, logging issues,
+ * added min save state dump, added INIT handler.
+ *
+ * 2003-12-08 Keith Owens <kaos@sgi.com>
+ * smp_call_function() must not be called from interrupt context (can
+ * deadlock on tasklist_lock). Use keventd to call smp_call_function().
+ *
+ * 2004-02-01 Keith Owens <kaos@sgi.com>
+ * Avoid deadlock when using printk() for MCA and INIT records.
+ * Delete all record printing code, moved to salinfo_decode in user space.
+ * Mark variables and functions static where possible.
+ * Delete dead variables and functions.
+ * Reorder to remove the need for forward declarations and to consolidate
+ * related code.
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kallsyms.h>
+#include <linux/smp_lock.h>
+#include <linux/bootmem.h>
+#include <linux/acpi.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/workqueue.h>
+
+#include <asm/delay.h>
+#include <asm/machvec.h>
+#include <asm/meminit.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/sal.h>
+#include <asm/mca.h>
+
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+
+#if defined(IA64_MCA_DEBUG_INFO)
+# define IA64_MCA_DEBUG(fmt...) printk(fmt)
+#else
+# define IA64_MCA_DEBUG(fmt...)
+#endif
+
+/* Used by mca_asm.S */
+ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state;
+ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state;
+u64 ia64_mca_serialize;
+DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
+DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
+DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */
+DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */
+
+unsigned long __per_cpu_mca[NR_CPUS];
+
+/* In mca_asm.S */
+extern void ia64_monarch_init_handler (void);
+extern void ia64_slave_init_handler (void);
+
+static ia64_mc_info_t ia64_mc_info;
+
+#define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */
+#define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */
+#define CMC_POLL_INTERVAL (1*60*HZ) /* 1 minute */
+#define CPE_HISTORY_LENGTH 5
+#define CMC_HISTORY_LENGTH 5
+
+static struct timer_list cpe_poll_timer;
+static struct timer_list cmc_poll_timer;
+/*
+ * This variable tells whether we are currently in polling mode.
+ * Start with this in the wrong state so we won't play w/ timers
+ * before the system is ready.
+ */
+static int cmc_polling_enabled = 1;
+
+/*
+ * Clearing this variable prevents CPE polling from getting activated
+ * in mca_late_init. Use it if your system doesn't provide a CPEI,
+ * but encounters problems retrieving CPE logs. This should only be
+ * necessary for debugging.
+ */
+static int cpe_poll_enabled = 1;
+
+extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);
+
+static int mca_init;
+
+/*
+ * IA64_MCA log support
+ */
+#define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */
+#define IA64_MAX_LOG_TYPES 4 /* MCA, INIT, CMC, CPE */
+
+typedef struct ia64_state_log_s
+{
+ spinlock_t isl_lock;
+ int isl_index;
+ unsigned long isl_count;
+ ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */
+} ia64_state_log_t;
+
+static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
+
+#define IA64_LOG_ALLOCATE(it, size) \
+ {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \
+ (ia64_err_rec_t *)alloc_bootmem(size); \
+ ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \
+ (ia64_err_rec_t *)alloc_bootmem(size);}
+#define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock)
+#define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s)
+#define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s)
+#define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index
+#define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index
+#define IA64_LOG_INDEX_INC(it) \
+ {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \
+ ia64_state_log[it].isl_count++;}
+#define IA64_LOG_INDEX_DEC(it) \
+ ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
+#define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]))
+#define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
+#define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count
+
+/*
+ * ia64_log_init
+ * Reset the OS ia64 log buffer
+ * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
+ * Outputs : None
+ */
+static void
+ia64_log_init(int sal_info_type)
+{
+ u64 max_size = 0;
+
+ IA64_LOG_NEXT_INDEX(sal_info_type) = 0;
+ IA64_LOG_LOCK_INIT(sal_info_type);
+
+ // SAL will tell us the maximum size of any error record of this type
+ max_size = ia64_sal_get_state_info_size(sal_info_type);
+ if (!max_size)
+ /* alloc_bootmem() doesn't like zero-sized allocations! */
+ return;
+
+ // set up OS data structures to hold error info
+ IA64_LOG_ALLOCATE(sal_info_type, max_size);
+ memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size);
+ memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size);
+}
+
+/*
+ * ia64_log_get
+ *
+ * Get the current MCA log from SAL and copy it into the OS log buffer.
+ *
+ * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
+ * irq_safe whether you can use printk at this point
+ * Outputs : size (total record length)
+ * *buffer (ptr to error record)
+ *
+ */
+static u64
+ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe)
+{
+ sal_log_record_header_t *log_buffer;
+ u64 total_len = 0;
+ int s;
+
+ IA64_LOG_LOCK(sal_info_type);
+
+ /* Get the process state information */
+ log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type);
+
+ total_len = ia64_sal_get_state_info(sal_info_type, (u64 *)log_buffer);
+
+ if (total_len) {
+ IA64_LOG_INDEX_INC(sal_info_type);
+ IA64_LOG_UNLOCK(sal_info_type);
+ if (irq_safe) {
+ IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. "
+ "Record length = %ld\n", __FUNCTION__, sal_info_type, total_len);
+ }
+ *buffer = (u8 *) log_buffer;
+ return total_len;
+ } else {
+ IA64_LOG_UNLOCK(sal_info_type);
+ return 0;
+ }
+}
+
+/*
+ * ia64_mca_log_sal_error_record
+ *
+ * This function retrieves a specified error record type from SAL
+ * and wakes up any processes waiting for error records.
+ *
+ * Inputs : sal_info_type (Type of error record MCA/CMC/CPE/INIT)
+ */
+static void
+ia64_mca_log_sal_error_record(int sal_info_type)
+{
+ u8 *buffer;
+ sal_log_record_header_t *rh;
+ u64 size;
+ int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA && sal_info_type != SAL_INFO_TYPE_INIT;
+#ifdef IA64_MCA_DEBUG_INFO
+ static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" };
+#endif
+
+ size = ia64_log_get(sal_info_type, &buffer, irq_safe);
+ if (!size)
+ return;
+
+ salinfo_log_wakeup(sal_info_type, buffer, size, irq_safe);
+
+ if (irq_safe)
+ IA64_MCA_DEBUG("CPU %d: SAL log contains %s error record\n",
+ smp_processor_id(),
+ sal_info_type < ARRAY_SIZE(rec_name) ? rec_name[sal_info_type] : "UNKNOWN");
+
+ /* Clear logs from corrected errors in case there's no user-level logger */
+ rh = (sal_log_record_header_t *)buffer;
+ if (rh->severity == sal_log_severity_corrected)
+ ia64_sal_clear_state_info(sal_info_type);
+}
+
+/*
+ * platform dependent error handling
+ */
+#ifndef PLATFORM_MCA_HANDLERS
+
+#ifdef CONFIG_ACPI
+
+static int cpe_vector = -1;
+
+static irqreturn_t
+ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
+{
+ static unsigned long cpe_history[CPE_HISTORY_LENGTH];
+ static int index;
+ static DEFINE_SPINLOCK(cpe_history_lock);
+
+ IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n",
+ __FUNCTION__, cpe_irq, smp_processor_id());
+
+ /* SAL spec states this should run w/ interrupts enabled */
+ local_irq_enable();
+
+ /* Get the CPE error record and log it */
+ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE);
+
+ spin_lock(&cpe_history_lock);
+ if (!cpe_poll_enabled && cpe_vector >= 0) {
+
+ int i, count = 1; /* we know 1 happened now */
+ unsigned long now = jiffies;
+
+ for (i = 0; i < CPE_HISTORY_LENGTH; i++) {
+ if (now - cpe_history[i] <= HZ)
+ count++;
+ }
+
+ IA64_MCA_DEBUG(KERN_INFO "CPE threshold %d/%d\n", count, CPE_HISTORY_LENGTH);
+ if (count >= CPE_HISTORY_LENGTH) {
+
+ cpe_poll_enabled = 1;
+ spin_unlock(&cpe_history_lock);
+ disable_irq_nosync(local_vector_to_irq(IA64_CPE_VECTOR));
+
+ /*
+ * Corrected errors will still be corrected, but
+ * make sure there's a log somewhere that indicates
+ * something is generating more than we can handle.
+ */
+ printk(KERN_WARNING "WARNING: Switching to polling CPE handler; error records may be lost\n");
+
+ mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL);
+
+ /* lock already released, get out now */
+ return IRQ_HANDLED;
+ } else {
+ cpe_history[index++] = now;
+ if (index == CPE_HISTORY_LENGTH)
+ index = 0;
+ }
+ }
+ spin_unlock(&cpe_history_lock);
+ return IRQ_HANDLED;
+}
+
+#endif /* CONFIG_ACPI */
+
+static void
+show_min_state (pal_min_state_area_t *minstate)
+{
+ u64 iip = minstate->pmsa_iip + ((struct ia64_psr *)(&minstate->pmsa_ipsr))->ri;
+ u64 xip = minstate->pmsa_xip + ((struct ia64_psr *)(&minstate->pmsa_xpsr))->ri;
+
+ printk("NaT bits\t%016lx\n", minstate->pmsa_nat_bits);
+ printk("pr\t\t%016lx\n", minstate->pmsa_pr);
+ printk("b0\t\t%016lx ", minstate->pmsa_br0); print_symbol("%s\n", minstate->pmsa_br0);
+ printk("ar.rsc\t\t%016lx\n", minstate->pmsa_rsc);
+ printk("cr.iip\t\t%016lx ", iip); print_symbol("%s\n", iip);
+ printk("cr.ipsr\t\t%016lx\n", minstate->pmsa_ipsr);
+ printk("cr.ifs\t\t%016lx\n", minstate->pmsa_ifs);
+ printk("xip\t\t%016lx ", xip); print_symbol("%s\n", xip);
+ printk("xpsr\t\t%016lx\n", minstate->pmsa_xpsr);
+ printk("xfs\t\t%016lx\n", minstate->pmsa_xfs);
+ printk("b1\t\t%016lx ", minstate->pmsa_br1);
+ print_symbol("%s\n", minstate->pmsa_br1);
+
+ printk("\nstatic registers r0-r15:\n");
+ printk(" r0- 3 %016lx %016lx %016lx %016lx\n",
+ 0UL, minstate->pmsa_gr[0], minstate->pmsa_gr[1], minstate->pmsa_gr[2]);
+ printk(" r4- 7 %016lx %016lx %016lx %016lx\n",
+ minstate->pmsa_gr[3], minstate->pmsa_gr[4],
+ minstate->pmsa_gr[5], minstate->pmsa_gr[6]);
+ printk(" r8-11 %016lx %016lx %016lx %016lx\n",
+ minstate->pmsa_gr[7], minstate->pmsa_gr[8],
+ minstate->pmsa_gr[9], minstate->pmsa_gr[10]);
+ printk("r12-15 %016lx %016lx %016lx %016lx\n",
+ minstate->pmsa_gr[11], minstate->pmsa_gr[12],
+ minstate->pmsa_gr[13], minstate->pmsa_gr[14]);
+
+ printk("\nbank 0:\n");
+ printk("r16-19 %016lx %016lx %016lx %016lx\n",
+ minstate->pmsa_bank0_gr[0], minstate->pmsa_bank0_gr[1],
+ minstate->pmsa_bank0_gr[2], minstate->pmsa_bank0_gr[3]);
+ printk("r20-23 %016lx %016lx %016lx %016lx\n",
+ minstate->pmsa_bank0_gr[4], minstate->pmsa_bank0_gr[5],
+ minstate->pmsa_bank0_gr[6], minstate->pmsa_bank0_gr[7]);
+ printk("r24-27 %016lx %016lx %016lx %016lx\n",
+ minstate->pmsa_bank0_gr[8], minstate->pmsa_bank0_gr[9],
+ minstate->pmsa_bank0_gr[10], minstate->pmsa_bank0_gr[11]);
+ printk("r28-31 %016lx %016lx %016lx %016lx\n",
+ minstate->pmsa_bank0_gr[12], minstate->pmsa_bank0_gr[13],
+ minstate->pmsa_bank0_gr[14], minstate->pmsa_bank0_gr[15]);
+
+ printk("\nbank 1:\n");
+ printk("r16-19 %016lx %016lx %016lx %016lx\n",
+ minstate->pmsa_bank1_gr[0], minstate->pmsa_bank1_gr[1],
+ minstate->pmsa_bank1_gr[2], minstate->pmsa_bank1_gr[3]);
+ printk("r20-23 %016lx %016lx %016lx %016lx\n",
+ minstate->pmsa_bank1_gr[4], minstate->pmsa_bank1_gr[5],
+ minstate->pmsa_bank1_gr[6], minstate->pmsa_bank1_gr[7]);
+ printk("r24-27 %016lx %016lx %016lx %016lx\n",
+ minstate->pmsa_bank1_gr[8], minstate->pmsa_bank1_gr[9],
+ minstate->pmsa_bank1_gr[10], minstate->pmsa_bank1_gr[11]);
+ printk("r28-31 %016lx %016lx %016lx %016lx\n",
+ minstate->pmsa_bank1_gr[12], minstate->pmsa_bank1_gr[13],
+ minstate->pmsa_bank1_gr[14], minstate->pmsa_bank1_gr[15]);
+}
+
+static void
+fetch_min_state (pal_min_state_area_t *ms, struct pt_regs *pt, struct switch_stack *sw)
+{
+ u64 *dst_banked, *src_banked, bit, shift, nat_bits;
+ int i;
+
+ /*
+ * First, update the pt-regs and switch-stack structures with the contents stored
+ * in the min-state area:
+ */
+ if (((struct ia64_psr *) &ms->pmsa_ipsr)->ic == 0) {
+ pt->cr_ipsr = ms->pmsa_xpsr;
+ pt->cr_iip = ms->pmsa_xip;
+ pt->cr_ifs = ms->pmsa_xfs;
+ } else {
+ pt->cr_ipsr = ms->pmsa_ipsr;
+ pt->cr_iip = ms->pmsa_iip;
+ pt->cr_ifs = ms->pmsa_ifs;
+ }
+ pt->ar_rsc = ms->pmsa_rsc;
+ pt->pr = ms->pmsa_pr;
+ pt->r1 = ms->pmsa_gr[0];
+ pt->r2 = ms->pmsa_gr[1];
+ pt->r3 = ms->pmsa_gr[2];
+ sw->r4 = ms->pmsa_gr[3];
+ sw->r5 = ms->pmsa_gr[4];
+ sw->r6 = ms->pmsa_gr[5];
+ sw->r7 = ms->pmsa_gr[6];
+ pt->r8 = ms->pmsa_gr[7];
+ pt->r9 = ms->pmsa_gr[8];
+ pt->r10 = ms->pmsa_gr[9];
+ pt->r11 = ms->pmsa_gr[10];
+ pt->r12 = ms->pmsa_gr[11];
+ pt->r13 = ms->pmsa_gr[12];
+ pt->r14 = ms->pmsa_gr[13];
+ pt->r15 = ms->pmsa_gr[14];
+ dst_banked = &pt->r16; /* r16-r31 are contiguous in struct pt_regs */
+ src_banked = ms->pmsa_bank1_gr;
+ for (i = 0; i < 16; ++i)
+ dst_banked[i] = src_banked[i];
+ pt->b0 = ms->pmsa_br0;
+ sw->b1 = ms->pmsa_br1;
+
+ /* construct the NaT bits for the pt-regs structure: */
+# define PUT_NAT_BIT(dst, addr) \
+ do { \
+ bit = nat_bits & 1; nat_bits >>= 1; \
+ shift = ((unsigned long) addr >> 3) & 0x3f; \
+ dst = ((dst) & ~(1UL << shift)) | (bit << shift); \
+ } while (0)
+
+ /* Rotate the saved NaT bits such that bit 0 corresponds to pmsa_gr[0]: */
+ shift = ((unsigned long) &ms->pmsa_gr[0] >> 3) & 0x3f;
+ nat_bits = (ms->pmsa_nat_bits >> shift) | (ms->pmsa_nat_bits << (64 - shift));
+
+ PUT_NAT_BIT(sw->caller_unat, &pt->r1);
+ PUT_NAT_BIT(sw->caller_unat, &pt->r2);
+ PUT_NAT_BIT(sw->caller_unat, &pt->r3);
+ PUT_NAT_BIT(sw->ar_unat, &sw->r4);
+ PUT_NAT_BIT(sw->ar_unat, &sw->r5);
+ PUT_NAT_BIT(sw->ar_unat, &sw->r6);
+ PUT_NAT_BIT(sw->ar_unat, &sw->r7);
+ PUT_NAT_BIT(sw->caller_unat, &pt->r8); PUT_NAT_BIT(sw->caller_unat, &pt->r9);
+ PUT_NAT_BIT(sw->caller_unat, &pt->r10); PUT_NAT_BIT(sw->caller_unat, &pt->r11);
+ PUT_NAT_BIT(sw->caller_unat, &pt->r12); PUT_NAT_BIT(sw->caller_unat, &pt->r13);
+ PUT_NAT_BIT(sw->caller_unat, &pt->r14); PUT_NAT_BIT(sw->caller_unat, &pt->r15);
+ nat_bits >>= 16; /* skip over bank0 NaT bits */
+ PUT_NAT_BIT(sw->caller_unat, &pt->r16); PUT_NAT_BIT(sw->caller_unat, &pt->r17);
+ PUT_NAT_BIT(sw->caller_unat, &pt->r18); PUT_NAT_BIT(sw->caller_unat, &pt->r19);
+ PUT_NAT_BIT(sw->caller_unat, &pt->r20); PUT_NAT_BIT(sw->caller_unat, &pt->r21);
+ PUT_NAT_BIT(sw->caller_unat, &pt->r22); PUT_NAT_BIT(sw->caller_unat, &pt->r23);
+ PUT_NAT_BIT(sw->caller_unat, &pt->r24); PUT_NAT_BIT(sw->caller_unat, &pt->r25);
+ PUT_NAT_BIT(sw->caller_unat, &pt->r26); PUT_NAT_BIT(sw->caller_unat, &pt->r27);
+ PUT_NAT_BIT(sw->caller_unat, &pt->r28); PUT_NAT_BIT(sw->caller_unat, &pt->r29);
+ PUT_NAT_BIT(sw->caller_unat, &pt->r30); PUT_NAT_BIT(sw->caller_unat, &pt->r31);
+}
+
+static void
+init_handler_platform (pal_min_state_area_t *ms,
+ struct pt_regs *pt, struct switch_stack *sw)
+{
+ struct unw_frame_info info;
+
+ /* if a kernel debugger is available call it here else just dump the registers */
+
+ /*
+ * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be
+ * generated via the BMC's command-line interface, but since the console is on the
+ * same serial line, the user will need some time to switch out of the BMC before
+ * the dump begins.
+ */
+ printk("Delaying for 5 seconds...\n");
+ udelay(5*1000000);
+ show_min_state(ms);
+
+ printk("Backtrace of current task (pid %d, %s)\n", current->pid, current->comm);
+ fetch_min_state(ms, pt, sw);
+ unw_init_from_interruption(&info, current, pt, sw);
+ ia64_do_show_stack(&info, NULL);
+
+#ifdef CONFIG_SMP
+ /* read_trylock() would be handy... */
+ if (!tasklist_lock.write_lock)
+ read_lock(&tasklist_lock);
+#endif
+ {
+ struct task_struct *g, *t;
+ do_each_thread (g, t) {
+ if (t == current)
+ continue;
+
+ printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
+ show_stack(t, NULL);
+ } while_each_thread (g, t);
+ }
+#ifdef CONFIG_SMP
+ if (!tasklist_lock.write_lock)
+ read_unlock(&tasklist_lock);
+#endif
+
+ printk("\nINIT dump complete. Please reboot now.\n");
+ while (1); /* hang city if no debugger */
+}
+
+#ifdef CONFIG_ACPI
+/*
+ * ia64_mca_register_cpev
+ *
+ * Register the corrected platform error vector with SAL.
+ *
+ * Inputs
+ * cpev Corrected Platform Error Vector number
+ *
+ * Outputs
+ * None
+ */
+static void
+ia64_mca_register_cpev (int cpev)
+{
+ /* Register the CPE interrupt vector with SAL */
+ struct ia64_sal_retval isrv;
+
+ isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0);
+ if (isrv.status) {
+ printk(KERN_ERR "Failed to register Corrected Platform "
+ "Error interrupt vector with SAL (status %ld)\n", isrv.status);
+ return;
+ }
+
+ IA64_MCA_DEBUG("%s: corrected platform error "
+ "vector %#x registered\n", __FUNCTION__, cpev);
+}
+#endif /* CONFIG_ACPI */
+
+#endif /* PLATFORM_MCA_HANDLERS */
+
+/*
+ * ia64_mca_cmc_vector_setup
+ *
+ * Setup the corrected machine check vector register in the processor.
+ * (The interrupt is masked on boot. ia64_mca_late_init unmask this.)
+ * This function is invoked on a per-processor basis.
+ *
+ * Inputs
+ * None
+ *
+ * Outputs
+ * None
+ */
+void
+ia64_mca_cmc_vector_setup (void)
+{
+ cmcv_reg_t cmcv;
+
+ cmcv.cmcv_regval = 0;
+ cmcv.cmcv_mask = 1; /* Mask/disable interrupt at first */
+ cmcv.cmcv_vector = IA64_CMC_VECTOR;
+ ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
+
+ IA64_MCA_DEBUG("%s: CPU %d corrected "
+ "machine check vector %#x registered.\n",
+ __FUNCTION__, smp_processor_id(), IA64_CMC_VECTOR);
+
+ IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n",
+ __FUNCTION__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV));
+}
+
+/*
+ * ia64_mca_cmc_vector_disable
+ *
+ * Mask the corrected machine check vector register in the processor.
+ * This function is invoked on a per-processor basis.
+ *
+ * Inputs
+ * dummy(unused)
+ *
+ * Outputs
+ * None
+ */
+static void
+ia64_mca_cmc_vector_disable (void *dummy)
+{
+ cmcv_reg_t cmcv;
+
+ cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV);
+
+ cmcv.cmcv_mask = 1; /* Mask/disable interrupt */
+ ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
+
+ IA64_MCA_DEBUG("%s: CPU %d corrected "
+ "machine check vector %#x disabled.\n",
+ __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector);
+}
+
+/*
+ * ia64_mca_cmc_vector_enable
+ *
+ * Unmask the corrected machine check vector register in the processor.
+ * This function is invoked on a per-processor basis.
+ *
+ * Inputs
+ * dummy(unused)
+ *
+ * Outputs
+ * None
+ */
+static void
+ia64_mca_cmc_vector_enable (void *dummy)
+{
+ cmcv_reg_t cmcv;
+
+ cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV);
+
+ cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */
+ ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
+
+ IA64_MCA_DEBUG("%s: CPU %d corrected "
+ "machine check vector %#x enabled.\n",
+ __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector);
+}
+
+/*
+ * ia64_mca_cmc_vector_disable_keventd
+ *
+ * Called via keventd (smp_call_function() is not safe in interrupt context) to
+ * disable the cmc interrupt vector.
+ */
+static void
+ia64_mca_cmc_vector_disable_keventd(void *unused)
+{
+ on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 1, 0);
+}
+
+/*
+ * ia64_mca_cmc_vector_enable_keventd
+ *
+ * Called via keventd (smp_call_function() is not safe in interrupt context) to
+ * enable the cmc interrupt vector.
+ */
+static void
+ia64_mca_cmc_vector_enable_keventd(void *unused)
+{
+ on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 1, 0);
+}
+
+/*
+ * ia64_mca_wakeup_ipi_wait
+ *
+ * Wait for the inter-cpu interrupt to be sent by the
+ * monarch processor once it is done with handling the
+ * MCA.
+ *
+ * Inputs : None
+ * Outputs : None
+ */
+static void
+ia64_mca_wakeup_ipi_wait(void)
+{
+ int irr_num = (IA64_MCA_WAKEUP_VECTOR >> 6);
+ int irr_bit = (IA64_MCA_WAKEUP_VECTOR & 0x3f);
+ u64 irr = 0;
+
+ do {
+ switch(irr_num) {
+ case 0:
+ irr = ia64_getreg(_IA64_REG_CR_IRR0);
+ break;
+ case 1:
+ irr = ia64_getreg(_IA64_REG_CR_IRR1);
+ break;
+ case 2:
+ irr = ia64_getreg(_IA64_REG_CR_IRR2);
+ break;
+ case 3:
+ irr = ia64_getreg(_IA64_REG_CR_IRR3);
+ break;
+ }
+ cpu_relax();
+ } while (!(irr & (1UL << irr_bit))) ;
+}
+
+/*
+ * ia64_mca_wakeup
+ *
+ * Send an inter-cpu interrupt to wake-up a particular cpu
+ * and mark that cpu to be out of rendez.
+ *
+ * Inputs : cpuid
+ * Outputs : None
+ */
+static void
+ia64_mca_wakeup(int cpu)
+{
+ platform_send_ipi(cpu, IA64_MCA_WAKEUP_VECTOR, IA64_IPI_DM_INT, 0);
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+
+}
+
+/*
+ * ia64_mca_wakeup_all
+ *
+ * Wakeup all the cpus which have rendez'ed previously.
+ *
+ * Inputs : None
+ * Outputs : None
+ */
+static void
+ia64_mca_wakeup_all(void)
+{
+ int cpu;
+
+ /* Clear the Rendez checkin flag for all cpus */
+ for(cpu = 0; cpu < NR_CPUS; cpu++) {
+ if (!cpu_online(cpu))
+ continue;
+ if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE)
+ ia64_mca_wakeup(cpu);
+ }
+
+}
+
+/*
+ * ia64_mca_rendez_interrupt_handler
+ *
+ * This is handler used to put slave processors into spinloop
+ * while the monarch processor does the mca handling and later
+ * wake each slave up once the monarch is done.
+ *
+ * Inputs : None
+ * Outputs : None
+ */
+static irqreturn_t
+ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs)
+{
+ unsigned long flags;
+ int cpu = smp_processor_id();
+
+ /* Mask all interrupts */
+ local_irq_save(flags);
+
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE;
+ /* Register with the SAL monarch that the slave has
+ * reached SAL
+ */
+ ia64_sal_mc_rendez();
+
+ /* Wait for the wakeup IPI from the monarch
+ * This waiting is done by polling on the wakeup-interrupt
+ * vector bit in the processor's IRRs
+ */
+ ia64_mca_wakeup_ipi_wait();
+
+ /* Enable all interrupts */
+ local_irq_restore(flags);
+ return IRQ_HANDLED;
+}
+
+/*
+ * ia64_mca_wakeup_int_handler
+ *
+ * The interrupt handler for processing the inter-cpu interrupt to the
+ * slave cpu which was spinning in the rendez loop.
+ * Since this spinning is done by turning off the interrupts and
+ * polling on the wakeup-interrupt bit in the IRR, there is
+ * nothing useful to be done in the handler.
+ *
+ * Inputs : wakeup_irq (Wakeup-interrupt bit)
+ * arg (Interrupt handler specific argument)
+ * ptregs (Exception frame at the time of the interrupt)
+ * Outputs : None
+ *
+ */
+static irqreturn_t
+ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs)
+{
+ return IRQ_HANDLED;
+}
+
+/*
+ * ia64_return_to_sal_check
+ *
+ * This is function called before going back from the OS_MCA handler
+ * to the OS_MCA dispatch code which finally takes the control back
+ * to the SAL.
+ * The main purpose of this routine is to setup the OS_MCA to SAL
+ * return state which can be used by the OS_MCA dispatch code
+ * just before going back to SAL.
+ *
+ * Inputs : None
+ * Outputs : None
+ */
+
+static void
+ia64_return_to_sal_check(int recover)
+{
+
+ /* Copy over some relevant stuff from the sal_to_os_mca_handoff
+ * so that it can be used at the time of os_mca_to_sal_handoff
+ */
+ ia64_os_to_sal_handoff_state.imots_sal_gp =
+ ia64_sal_to_os_handoff_state.imsto_sal_gp;
+
+ ia64_os_to_sal_handoff_state.imots_sal_check_ra =
+ ia64_sal_to_os_handoff_state.imsto_sal_check_ra;
+
+ if (recover)
+ ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED;
+ else
+ ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT;
+
+ /* Default = tell SAL to return to same context */
+ ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT;
+
+ ia64_os_to_sal_handoff_state.imots_new_min_state =
+ (u64 *)ia64_sal_to_os_handoff_state.pal_min_state;
+
+}
+
+/* Function pointer for extra MCA recovery */
+int (*ia64_mca_ucmc_extension)
+ (void*,ia64_mca_sal_to_os_state_t*,ia64_mca_os_to_sal_state_t*)
+ = NULL;
+
+int
+ia64_reg_MCA_extension(void *fn)
+{
+ if (ia64_mca_ucmc_extension)
+ return 1;
+
+ ia64_mca_ucmc_extension = fn;
+ return 0;
+}
+
+void
+ia64_unreg_MCA_extension(void)
+{
+ if (ia64_mca_ucmc_extension)
+ ia64_mca_ucmc_extension = NULL;
+}
+
+EXPORT_SYMBOL(ia64_reg_MCA_extension);
+EXPORT_SYMBOL(ia64_unreg_MCA_extension);
+
+/*
+ * ia64_mca_ucmc_handler
+ *
+ * This is uncorrectable machine check handler called from OS_MCA
+ * dispatch code which is in turn called from SAL_CHECK().
+ * This is the place where the core of OS MCA handling is done.
+ * Right now the logs are extracted and displayed in a well-defined
+ * format. This handler code is supposed to be run only on the
+ * monarch processor. Once the monarch is done with MCA handling
+ * further MCA logging is enabled by clearing logs.
+ * Monarch also has the duty of sending wakeup-IPIs to pull the
+ * slave processors out of rendezvous spinloop.
+ *
+ * Inputs : None
+ * Outputs : None
+ */
+void
+ia64_mca_ucmc_handler(void)
+{
+ pal_processor_state_info_t *psp = (pal_processor_state_info_t *)
+ &ia64_sal_to_os_handoff_state.proc_state_param;
+ int recover;
+
+ /* Get the MCA error record and log it */
+ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
+
+ /* TLB error is only exist in this SAL error record */
+ recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
+ /* other error recovery */
+ || (ia64_mca_ucmc_extension
+ && ia64_mca_ucmc_extension(
+ IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA),
+ &ia64_sal_to_os_handoff_state,
+ &ia64_os_to_sal_handoff_state));
+
+ if (recover) {
+ sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA);
+ rh->severity = sal_log_severity_corrected;
+ ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
+ }
+ /*
+ * Wakeup all the processors which are spinning in the rendezvous
+ * loop.
+ */
+ ia64_mca_wakeup_all();
+
+ /* Return to SAL */
+ ia64_return_to_sal_check(recover);
+}
+
+static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL);
+static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd, NULL);
+
+/*
+ * ia64_mca_cmc_int_handler
+ *
+ * This is corrected machine check interrupt handler.
+ * Right now the logs are extracted and displayed in a well-defined
+ * format.
+ *
+ * Inputs
+ * interrupt number
+ * client data arg ptr
+ * saved registers ptr
+ *
+ * Outputs
+ * None
+ */
+static irqreturn_t
+ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
+{
+ static unsigned long cmc_history[CMC_HISTORY_LENGTH];
+ static int index;
+ static DEFINE_SPINLOCK(cmc_history_lock);
+
+ IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n",
+ __FUNCTION__, cmc_irq, smp_processor_id());
+
+ /* SAL spec states this should run w/ interrupts enabled */
+ local_irq_enable();
+
+ /* Get the CMC error record and log it */
+ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC);
+
+ spin_lock(&cmc_history_lock);
+ if (!cmc_polling_enabled) {
+ int i, count = 1; /* we know 1 happened now */
+ unsigned long now = jiffies;
+
+ for (i = 0; i < CMC_HISTORY_LENGTH; i++) {
+ if (now - cmc_history[i] <= HZ)
+ count++;
+ }
+
+ IA64_MCA_DEBUG(KERN_INFO "CMC threshold %d/%d\n", count, CMC_HISTORY_LENGTH);
+ if (count >= CMC_HISTORY_LENGTH) {
+
+ cmc_polling_enabled = 1;
+ spin_unlock(&cmc_history_lock);
+ schedule_work(&cmc_disable_work);
+
+ /*
+ * Corrected errors will still be corrected, but
+ * make sure there's a log somewhere that indicates
+ * something is generating more than we can handle.
+ */
+ printk(KERN_WARNING "WARNING: Switching to polling CMC handler; error records may be lost\n");
+
+ mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
+
+ /* lock already released, get out now */
+ return IRQ_HANDLED;
+ } else {
+ cmc_history[index++] = now;
+ if (index == CMC_HISTORY_LENGTH)
+ index = 0;
+ }
+ }
+ spin_unlock(&cmc_history_lock);
+ return IRQ_HANDLED;
+}
+
+/*
+ * ia64_mca_cmc_int_caller
+ *
+ * Triggered by sw interrupt from CMC polling routine. Calls
+ * real interrupt handler and either triggers a sw interrupt
+ * on the next cpu or does cleanup at the end.
+ *
+ * Inputs
+ * interrupt number
+ * client data arg ptr
+ * saved registers ptr
+ * Outputs
+ * handled
+ */
+static irqreturn_t
+ia64_mca_cmc_int_caller(int cmc_irq, void *arg, struct pt_regs *ptregs)
+{
+ static int start_count = -1;
+ unsigned int cpuid;
+
+ cpuid = smp_processor_id();
+
+ /* If first cpu, update count */
+ if (start_count == -1)
+ start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC);
+
+ ia64_mca_cmc_int_handler(cmc_irq, arg, ptregs);
+
+ for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++);
+
+ if (cpuid < NR_CPUS) {
+ platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);
+ } else {
+ /* If no log record, switch out of polling mode */
+ if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) {
+
+ printk(KERN_WARNING "Returning to interrupt driven CMC handler\n");
+ schedule_work(&cmc_enable_work);
+ cmc_polling_enabled = 0;
+
+ } else {
+
+ mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
+ }
+
+ start_count = -1;
+ }
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * ia64_mca_cmc_poll
+ *
+ * Poll for Corrected Machine Checks (CMCs)
+ *
+ * Inputs : dummy(unused)
+ * Outputs : None
+ *
+ */
+static void
+ia64_mca_cmc_poll (unsigned long dummy)
+{
+ /* Trigger a CMC interrupt cascade */
+ platform_send_ipi(first_cpu(cpu_online_map), IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);
+}
+
+/*
+ * ia64_mca_cpe_int_caller
+ *
+ * Triggered by sw interrupt from CPE polling routine. Calls
+ * real interrupt handler and either triggers a sw interrupt
+ * on the next cpu or does cleanup at the end.
+ *
+ * Inputs
+ * interrupt number
+ * client data arg ptr
+ * saved registers ptr
+ * Outputs
+ * handled
+ */
+#ifdef CONFIG_ACPI
+
+static irqreturn_t
+ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs)
+{
+ static int start_count = -1;
+ static int poll_time = MIN_CPE_POLL_INTERVAL;
+ unsigned int cpuid;
+
+ cpuid = smp_processor_id();
+
+ /* If first cpu, update count */
+ if (start_count == -1)
+ start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE);
+
+ ia64_mca_cpe_int_handler(cpe_irq, arg, ptregs);
+
+ for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++);
+
+ if (cpuid < NR_CPUS) {
+ platform_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
+ } else {
+ /*
+ * If a log was recorded, increase our polling frequency,
+ * otherwise, backoff or return to interrupt mode.
+ */
+ if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) {
+ poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time / 2);
+ } else if (cpe_vector < 0) {
+ poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2);
+ } else {
+ poll_time = MIN_CPE_POLL_INTERVAL;
+
+ printk(KERN_WARNING "Returning to interrupt driven CPE handler\n");
+ enable_irq(local_vector_to_irq(IA64_CPE_VECTOR));
+ cpe_poll_enabled = 0;
+ }
+
+ if (cpe_poll_enabled)
+ mod_timer(&cpe_poll_timer, jiffies + poll_time);
+ start_count = -1;
+ }
+
+ return IRQ_HANDLED;
+}
+
+#endif /* CONFIG_ACPI */
+
+/*
+ * ia64_mca_cpe_poll
+ *
+ * Poll for Corrected Platform Errors (CPEs), trigger interrupt
+ * on first cpu, from there it will trickle through all the cpus.
+ *
+ * Inputs : dummy(unused)
+ * Outputs : None
+ *
+ */
+static void
+ia64_mca_cpe_poll (unsigned long dummy)
+{
+ /* Trigger a CPE interrupt cascade */
+ platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
+}
+
+/*
+ * C portion of the OS INIT handler
+ *
+ * Called from ia64_monarch_init_handler
+ *
+ * Inputs: pointer to pt_regs where processor info was saved.
+ *
+ * Returns:
+ * 0 if SAL must warm boot the System
+ * 1 if SAL must return to interrupted context using PAL_MC_RESUME
+ *
+ */
+void
+ia64_init_handler (struct pt_regs *pt, struct switch_stack *sw)
+{
+ pal_min_state_area_t *ms;
+
+ oops_in_progress = 1; /* avoid deadlock in printk, but it makes recovery dodgy */
+ console_loglevel = 15; /* make sure printks make it to console */
+
+ printk(KERN_INFO "Entered OS INIT handler. PSP=%lx\n",
+ ia64_sal_to_os_handoff_state.proc_state_param);
+
+ /*
+ * Address of minstate area provided by PAL is physical,
+ * uncacheable (bit 63 set). Convert to Linux virtual
+ * address in region 6.
+ */
+ ms = (pal_min_state_area_t *)(ia64_sal_to_os_handoff_state.pal_min_state | (6ul<<61));
+
+ init_handler_platform(ms, pt, sw); /* call platform specific routines */
+}
+
+static int __init
+ia64_mca_disable_cpe_polling(char *str)
+{
+ cpe_poll_enabled = 0;
+ return 1;
+}
+
+__setup("disable_cpe_poll", ia64_mca_disable_cpe_polling);
+
+static struct irqaction cmci_irqaction = {
+ .handler = ia64_mca_cmc_int_handler,
+ .flags = SA_INTERRUPT,
+ .name = "cmc_hndlr"
+};
+
+static struct irqaction cmcp_irqaction = {
+ .handler = ia64_mca_cmc_int_caller,
+ .flags = SA_INTERRUPT,
+ .name = "cmc_poll"
+};
+
+static struct irqaction mca_rdzv_irqaction = {
+ .handler = ia64_mca_rendez_int_handler,
+ .flags = SA_INTERRUPT,
+ .name = "mca_rdzv"
+};
+
+static struct irqaction mca_wkup_irqaction = {
+ .handler = ia64_mca_wakeup_int_handler,
+ .flags = SA_INTERRUPT,
+ .name = "mca_wkup"
+};
+
+#ifdef CONFIG_ACPI
+static struct irqaction mca_cpe_irqaction = {
+ .handler = ia64_mca_cpe_int_handler,
+ .flags = SA_INTERRUPT,
+ .name = "cpe_hndlr"
+};
+
+static struct irqaction mca_cpep_irqaction = {
+ .handler = ia64_mca_cpe_int_caller,
+ .flags = SA_INTERRUPT,
+ .name = "cpe_poll"
+};
+#endif /* CONFIG_ACPI */
+
+/* Do per-CPU MCA-related initialization. */
+
+void __devinit
+ia64_mca_cpu_init(void *cpu_data)
+{
+ void *pal_vaddr;
+
+ if (smp_processor_id() == 0) {
+ void *mca_data;
+ int cpu;
+
+ mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu)
+ * NR_CPUS);
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ __per_cpu_mca[cpu] = __pa(mca_data);
+ mca_data += sizeof(struct ia64_mca_cpu);
+ }
+ }
+
+ /*
+ * The MCA info structure was allocated earlier and its
+ * physical address saved in __per_cpu_mca[cpu]. Copy that
+ * address * to ia64_mca_data so we can access it as a per-CPU
+ * variable.
+ */
+ __get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()];
+
+ /*
+ * Stash away a copy of the PTE needed to map the per-CPU page.
+ * We may need it during MCA recovery.
+ */
+ __get_cpu_var(ia64_mca_per_cpu_pte) =
+ pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL));
+
+ /*
+ * Also, stash away a copy of the PAL address and the PTE
+ * needed to map it.
+ */
+ pal_vaddr = efi_get_pal_addr();
+ if (!pal_vaddr)
+ return;
+ __get_cpu_var(ia64_mca_pal_base) =
+ GRANULEROUNDDOWN((unsigned long) pal_vaddr);
+ __get_cpu_var(ia64_mca_pal_pte) = pte_val(mk_pte_phys(__pa(pal_vaddr),
+ PAGE_KERNEL));
+}
+
+/*
+ * ia64_mca_init
+ *
+ * Do all the system level mca specific initialization.
+ *
+ * 1. Register spinloop and wakeup request interrupt vectors
+ *
+ * 2. Register OS_MCA handler entry point
+ *
+ * 3. Register OS_INIT handler entry point
+ *
+ * 4. Initialize MCA/CMC/INIT related log buffers maintained by the OS.
+ *
+ * Note that this initialization is done very early before some kernel
+ * services are available.
+ *
+ * Inputs : None
+ *
+ * Outputs : None
+ */
+void __init
+ia64_mca_init(void)
+{
+ ia64_fptr_t *mon_init_ptr = (ia64_fptr_t *)ia64_monarch_init_handler;
+ ia64_fptr_t *slave_init_ptr = (ia64_fptr_t *)ia64_slave_init_handler;
+ ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch;
+ int i;
+ s64 rc;
+ struct ia64_sal_retval isrv;
+ u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */
+
+ IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__);
+
+ /* Clear the Rendez checkin flag for all cpus */
+ for(i = 0 ; i < NR_CPUS; i++)
+ ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+
+ /*
+ * Register the rendezvous spinloop and wakeup mechanism with SAL
+ */
+
+ /* Register the rendezvous interrupt vector with SAL */
+ while (1) {
+ isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT,
+ SAL_MC_PARAM_MECHANISM_INT,
+ IA64_MCA_RENDEZ_VECTOR,
+ timeout,
+ SAL_MC_PARAM_RZ_ALWAYS);
+ rc = isrv.status;
+ if (rc == 0)
+ break;
+ if (rc == -2) {
+ printk(KERN_INFO "Increasing MCA rendezvous timeout from "
+ "%ld to %ld milliseconds\n", timeout, isrv.v0);
+ timeout = isrv.v0;
+ continue;
+ }
+ printk(KERN_ERR "Failed to register rendezvous interrupt "
+ "with SAL (status %ld)\n", rc);
+ return;
+ }
+
+ /* Register the wakeup interrupt vector with SAL */
+ isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP,
+ SAL_MC_PARAM_MECHANISM_INT,
+ IA64_MCA_WAKEUP_VECTOR,
+ 0, 0);
+ rc = isrv.status;
+ if (rc) {
+ printk(KERN_ERR "Failed to register wakeup interrupt with SAL "
+ "(status %ld)\n", rc);
+ return;
+ }
+
+ IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __FUNCTION__);
+
+ ia64_mc_info.imi_mca_handler = ia64_tpa(mca_hldlr_ptr->fp);
+ /*
+ * XXX - disable SAL checksum by setting size to 0; should be
+ * ia64_tpa(ia64_os_mca_dispatch_end) - ia64_tpa(ia64_os_mca_dispatch);
+ */
+ ia64_mc_info.imi_mca_handler_size = 0;
+
+ /* Register the os mca handler with SAL */
+ if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA,
+ ia64_mc_info.imi_mca_handler,
+ ia64_tpa(mca_hldlr_ptr->gp),
+ ia64_mc_info.imi_mca_handler_size,
+ 0, 0, 0)))
+ {
+ printk(KERN_ERR "Failed to register OS MCA handler with SAL "
+ "(status %ld)\n", rc);
+ return;
+ }
+
+ IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __FUNCTION__,
+ ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp));
+
+ /*
+ * XXX - disable SAL checksum by setting size to 0, should be
+ * size of the actual init handler in mca_asm.S.
+ */
+ ia64_mc_info.imi_monarch_init_handler = ia64_tpa(mon_init_ptr->fp);
+ ia64_mc_info.imi_monarch_init_handler_size = 0;
+ ia64_mc_info.imi_slave_init_handler = ia64_tpa(slave_init_ptr->fp);
+ ia64_mc_info.imi_slave_init_handler_size = 0;
+
+ IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__,
+ ia64_mc_info.imi_monarch_init_handler);
+
+ /* Register the os init handler with SAL */
+ if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT,
+ ia64_mc_info.imi_monarch_init_handler,
+ ia64_tpa(ia64_getreg(_IA64_REG_GP)),
+ ia64_mc_info.imi_monarch_init_handler_size,
+ ia64_mc_info.imi_slave_init_handler,
+ ia64_tpa(ia64_getreg(_IA64_REG_GP)),
+ ia64_mc_info.imi_slave_init_handler_size)))
+ {
+ printk(KERN_ERR "Failed to register m/s INIT handlers with SAL "
+ "(status %ld)\n", rc);
+ return;
+ }
+
+ IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__);
+
+ /*
+ * Configure the CMCI/P vector and handler. Interrupts for CMC are
+ * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c).
+ */
+ register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction);
+ register_percpu_irq(IA64_CMCP_VECTOR, &cmcp_irqaction);
+ ia64_mca_cmc_vector_setup(); /* Setup vector on BSP */
+
+ /* Setup the MCA rendezvous interrupt vector */
+ register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, &mca_rdzv_irqaction);
+
+ /* Setup the MCA wakeup interrupt vector */
+ register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction);
+
+#ifdef CONFIG_ACPI
+ /* Setup the CPEI/P vector and handler */
+ cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI);
+ register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction);
+#endif
+
+ /* Initialize the areas set aside by the OS to buffer the
+ * platform/processor error states for MCA/INIT/CMC
+ * handling.
+ */
+ ia64_log_init(SAL_INFO_TYPE_MCA);
+ ia64_log_init(SAL_INFO_TYPE_INIT);
+ ia64_log_init(SAL_INFO_TYPE_CMC);
+ ia64_log_init(SAL_INFO_TYPE_CPE);
+
+ mca_init = 1;
+ printk(KERN_INFO "MCA related initialization done\n");
+}
+
+/*
+ * ia64_mca_late_init
+ *
+ * Opportunity to setup things that require initialization later
+ * than ia64_mca_init. Setup a timer to poll for CPEs if the
+ * platform doesn't support an interrupt driven mechanism.
+ *
+ * Inputs : None
+ * Outputs : Status
+ */
+static int __init
+ia64_mca_late_init(void)
+{
+ if (!mca_init)
+ return 0;
+
+ /* Setup the CMCI/P vector and handler */
+ init_timer(&cmc_poll_timer);
+ cmc_poll_timer.function = ia64_mca_cmc_poll;
+
+ /* Unmask/enable the vector */
+ cmc_polling_enabled = 0;
+ schedule_work(&cmc_enable_work);
+
+ IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __FUNCTION__);
+
+#ifdef CONFIG_ACPI
+ /* Setup the CPEI/P vector and handler */
+ init_timer(&cpe_poll_timer);
+ cpe_poll_timer.function = ia64_mca_cpe_poll;
+
+ {
+ irq_desc_t *desc;
+ unsigned int irq;
+
+ if (cpe_vector >= 0) {
+ /* If platform supports CPEI, enable the irq. */
+ cpe_poll_enabled = 0;
+ for (irq = 0; irq < NR_IRQS; ++irq)
+ if (irq_to_vector(irq) == cpe_vector) {
+ desc = irq_descp(irq);
+ desc->status |= IRQ_PER_CPU;
+ setup_irq(irq, &mca_cpe_irqaction);
+ }
+ ia64_mca_register_cpev(cpe_vector);
+ IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__);
+ } else {
+ /* If platform doesn't support CPEI, get the timer going. */
+ if (cpe_poll_enabled) {
+ ia64_mca_cpe_poll(0UL);
+ IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __FUNCTION__);
+ }
+ }
+ }
+#endif
+
+ return 0;
+}
+
+device_initcall(ia64_mca_late_init);
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
new file mode 100644
index 00000000000..cf3f8014f9a
--- /dev/null
+++ b/arch/ia64/kernel/mca_asm.S
@@ -0,0 +1,928 @@
+//
+// assembly portion of the IA64 MCA handling
+//
+// Mods by cfleck to integrate into kernel build
+// 00/03/15 davidm Added various stop bits to get a clean compile
+//
+// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp
+// kstack, switch modes, jump to C INIT handler
+//
+// 02/01/04 J.Hall <jenna.s.hall@intel.com>
+// Before entering virtual mode code:
+// 1. Check for TLB CPU error
+// 2. Restore current thread pointer to kr6
+// 3. Move stack ptr 16 bytes to conform to C calling convention
+//
+// 04/11/12 Russ Anderson <rja@sgi.com>
+// Added per cpu MCA/INIT stack save areas.
+//
+#include <linux/config.h>
+#include <linux/threads.h>
+
+#include <asm/asmmacro.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/mca_asm.h>
+#include <asm/mca.h>
+
+/*
+ * When we get a machine check, the kernel stack pointer is no longer
+ * valid, so we need to set a new stack pointer.
+ */
+#define MINSTATE_PHYS /* Make sure stack access is physical for MINSTATE */
+
+/*
+ * Needed for return context to SAL
+ */
+#define IA64_MCA_SAME_CONTEXT 0
+#define IA64_MCA_COLD_BOOT -2
+
+#include "minstate.h"
+
+/*
+ * SAL_TO_OS_MCA_HANDOFF_STATE (SAL 3.0 spec)
+ * 1. GR1 = OS GP
+ * 2. GR8 = PAL_PROC physical address
+ * 3. GR9 = SAL_PROC physical address
+ * 4. GR10 = SAL GP (physical)
+ * 5. GR11 = Rendez state
+ * 6. GR12 = Return address to location within SAL_CHECK
+ */
+#define SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(_tmp) \
+ LOAD_PHYSICAL(p0, _tmp, ia64_sal_to_os_handoff_state);; \
+ st8 [_tmp]=r1,0x08;; \
+ st8 [_tmp]=r8,0x08;; \
+ st8 [_tmp]=r9,0x08;; \
+ st8 [_tmp]=r10,0x08;; \
+ st8 [_tmp]=r11,0x08;; \
+ st8 [_tmp]=r12,0x08;; \
+ st8 [_tmp]=r17,0x08;; \
+ st8 [_tmp]=r18,0x08
+
+/*
+ * OS_MCA_TO_SAL_HANDOFF_STATE (SAL 3.0 spec)
+ * (p6) is executed if we never entered virtual mode (TLB error)
+ * (p7) is executed if we entered virtual mode as expected (normal case)
+ * 1. GR8 = OS_MCA return status
+ * 2. GR9 = SAL GP (physical)
+ * 3. GR10 = 0/1 returning same/new context
+ * 4. GR22 = New min state save area pointer
+ * returns ptr to SAL rtn save loc in _tmp
+ */
+#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp) \
+ movl _tmp=ia64_os_to_sal_handoff_state;; \
+ DATA_VA_TO_PA(_tmp);; \
+ ld8 r8=[_tmp],0x08;; \
+ ld8 r9=[_tmp],0x08;; \
+ ld8 r10=[_tmp],0x08;; \
+ ld8 r22=[_tmp],0x08;;
+ // now _tmp is pointing to SAL rtn save location
+
+/*
+ * COLD_BOOT_HANDOFF_STATE() sets ia64_mca_os_to_sal_state
+ * imots_os_status=IA64_MCA_COLD_BOOT
+ * imots_sal_gp=SAL GP
+ * imots_context=IA64_MCA_SAME_CONTEXT
+ * imots_new_min_state=Min state save area pointer
+ * imots_sal_check_ra=Return address to location within SAL_CHECK
+ *
+ */
+#define COLD_BOOT_HANDOFF_STATE(sal_to_os_handoff,os_to_sal_handoff,tmp)\
+ movl tmp=IA64_MCA_COLD_BOOT; \
+ movl sal_to_os_handoff=__pa(ia64_sal_to_os_handoff_state); \
+ movl os_to_sal_handoff=__pa(ia64_os_to_sal_handoff_state);; \
+ st8 [os_to_sal_handoff]=tmp,8;; \
+ ld8 tmp=[sal_to_os_handoff],48;; \
+ st8 [os_to_sal_handoff]=tmp,8;; \
+ movl tmp=IA64_MCA_SAME_CONTEXT;; \
+ st8 [os_to_sal_handoff]=tmp,8;; \
+ ld8 tmp=[sal_to_os_handoff],-8;; \
+ st8 [os_to_sal_handoff]=tmp,8;; \
+ ld8 tmp=[sal_to_os_handoff];; \
+ st8 [os_to_sal_handoff]=tmp;;
+
+#define GET_IA64_MCA_DATA(reg) \
+ GET_THIS_PADDR(reg, ia64_mca_data) \
+ ;; \
+ ld8 reg=[reg]
+
+ .global ia64_os_mca_dispatch
+ .global ia64_os_mca_dispatch_end
+ .global ia64_sal_to_os_handoff_state
+ .global ia64_os_to_sal_handoff_state
+
+ .text
+ .align 16
+
+ia64_os_mca_dispatch:
+
+ // Serialize all MCA processing
+ mov r3=1;;
+ LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);;
+ia64_os_mca_spin:
+ xchg8 r4=[r2],r3;;
+ cmp.ne p6,p0=r4,r0
+(p6) br ia64_os_mca_spin
+
+ // Save the SAL to OS MCA handoff state as defined
+ // by SAL SPEC 3.0
+ // NOTE : The order in which the state gets saved
+ // is dependent on the way the C-structure
+ // for ia64_mca_sal_to_os_state_t has been
+ // defined in include/asm/mca.h
+ SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
+ ;;
+
+ // LOG PROCESSOR STATE INFO FROM HERE ON..
+begin_os_mca_dump:
+ br ia64_os_mca_proc_state_dump;;
+
+ia64_os_mca_done_dump:
+
+ LOAD_PHYSICAL(p0,r16,ia64_sal_to_os_handoff_state+56)
+ ;;
+ ld8 r18=[r16] // Get processor state parameter on existing PALE_CHECK.
+ ;;
+ tbit.nz p6,p7=r18,60
+(p7) br.spnt done_tlb_purge_and_reload
+
+ // The following code purges TC and TR entries. Then reload all TC entries.
+ // Purge percpu data TC entries.
+begin_tlb_purge_and_reload:
+
+#define O(member) IA64_CPUINFO_##member##_OFFSET
+
+ GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2
+ ;;
+ addl r17=O(PTCE_STRIDE),r2
+ addl r2=O(PTCE_BASE),r2
+ ;;
+ ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base
+ ld4 r19=[r2],4 // r19=ptce_count[0]
+ ld4 r21=[r17],4 // r21=ptce_stride[0]
+ ;;
+ ld4 r20=[r2] // r20=ptce_count[1]
+ ld4 r22=[r17] // r22=ptce_stride[1]
+ mov r24=0
+ ;;
+ adds r20=-1,r20
+ ;;
+#undef O
+
+2:
+ cmp.ltu p6,p7=r24,r19
+(p7) br.cond.dpnt.few 4f
+ mov ar.lc=r20
+3:
+ ptc.e r18
+ ;;
+ add r18=r22,r18
+ br.cloop.sptk.few 3b
+ ;;
+ add r18=r21,r18
+ add r24=1,r24
+ ;;
+ br.sptk.few 2b
+4:
+ srlz.i // srlz.i implies srlz.d
+ ;;
+
+ // Now purge addresses formerly mapped by TR registers
+ // 1. Purge ITR&DTR for kernel.
+ movl r16=KERNEL_START
+ mov r18=KERNEL_TR_PAGE_SHIFT<<2
+ ;;
+ ptr.i r16, r18
+ ptr.d r16, r18
+ ;;
+ srlz.i
+ ;;
+ srlz.d
+ ;;
+ // 2. Purge DTR for PERCPU data.
+ movl r16=PERCPU_ADDR
+ mov r18=PERCPU_PAGE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.d
+ ;;
+ // 3. Purge ITR for PAL code.
+ GET_THIS_PADDR(r2, ia64_mca_pal_base)
+ ;;
+ ld8 r16=[r2]
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.i r16,r18
+ ;;
+ srlz.i
+ ;;
+ // 4. Purge DTR for stack.
+ mov r16=IA64_KR(CURRENT_STACK)
+ ;;
+ shl r16=r16,IA64_GRANULE_SHIFT
+ movl r19=PAGE_OFFSET
+ ;;
+ add r16=r19,r16
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.i
+ ;;
+ // Finally reload the TR registers.
+ // 1. Reload DTR/ITR registers for kernel.
+ mov r18=KERNEL_TR_PAGE_SHIFT<<2
+ movl r17=KERNEL_START
+ ;;
+ mov cr.itir=r18
+ mov cr.ifa=r17
+ mov r16=IA64_TR_KERNEL
+ mov r19=ip
+ movl r18=PAGE_KERNEL
+ ;;
+ dep r17=0,r19,0, KERNEL_TR_PAGE_SHIFT
+ ;;
+ or r18=r17,r18
+ ;;
+ itr.i itr[r16]=r18
+ ;;
+ itr.d dtr[r16]=r18
+ ;;
+ srlz.i
+ srlz.d
+ ;;
+ // 2. Reload DTR register for PERCPU data.
+ GET_THIS_PADDR(r2, ia64_mca_per_cpu_pte)
+ ;;
+ movl r16=PERCPU_ADDR // vaddr
+ movl r18=PERCPU_PAGE_SHIFT<<2
+ ;;
+ mov cr.itir=r18
+ mov cr.ifa=r16
+ ;;
+ ld8 r18=[r2] // load per-CPU PTE
+ mov r16=IA64_TR_PERCPU_DATA;
+ ;;
+ itr.d dtr[r16]=r18
+ ;;
+ srlz.d
+ ;;
+ // 3. Reload ITR for PAL code.
+ GET_THIS_PADDR(r2, ia64_mca_pal_pte)
+ ;;
+ ld8 r18=[r2] // load PAL PTE
+ ;;
+ GET_THIS_PADDR(r2, ia64_mca_pal_base)
+ ;;
+ ld8 r16=[r2] // load PAL vaddr
+ mov r19=IA64_GRANULE_SHIFT<<2
+ ;;
+ mov cr.itir=r19
+ mov cr.ifa=r16
+ mov r20=IA64_TR_PALCODE
+ ;;
+ itr.i itr[r20]=r18
+ ;;
+ srlz.i
+ ;;
+ // 4. Reload DTR for stack.
+ mov r16=IA64_KR(CURRENT_STACK)
+ ;;
+ shl r16=r16,IA64_GRANULE_SHIFT
+ movl r19=PAGE_OFFSET
+ ;;
+ add r18=r19,r16
+ movl r20=PAGE_KERNEL
+ ;;
+ add r16=r20,r16
+ mov r19=IA64_GRANULE_SHIFT<<2
+ ;;
+ mov cr.itir=r19
+ mov cr.ifa=r18
+ mov r20=IA64_TR_CURRENT_STACK
+ ;;
+ itr.d dtr[r20]=r16
+ ;;
+ srlz.d
+ ;;
+ br.sptk.many done_tlb_purge_and_reload
+err:
+ COLD_BOOT_HANDOFF_STATE(r20,r21,r22)
+ br.sptk.many ia64_os_mca_done_restore
+
+done_tlb_purge_and_reload:
+
+ // Setup new stack frame for OS_MCA handling
+ GET_IA64_MCA_DATA(r2)
+ ;;
+ add r3 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2
+ add r2 = IA64_MCA_CPU_RBSTORE_OFFSET, r2
+ ;;
+ rse_switch_context(r6,r3,r2);; // RSC management in this new context
+
+ GET_IA64_MCA_DATA(r2)
+ ;;
+ add r2 = IA64_MCA_CPU_STACK_OFFSET+IA64_MCA_STACK_SIZE-16, r2
+ ;;
+ mov r12=r2 // establish new stack-pointer
+
+ // Enter virtual mode from physical mode
+ VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4)
+ia64_os_mca_virtual_begin:
+
+ // Call virtual mode handler
+ movl r2=ia64_mca_ucmc_handler;;
+ mov b6=r2;;
+ br.call.sptk.many b0=b6;;
+.ret0:
+ // Revert back to physical mode before going back to SAL
+ PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4)
+ia64_os_mca_virtual_end:
+
+ // restore the original stack frame here
+ GET_IA64_MCA_DATA(r2)
+ ;;
+ add r2 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2
+ ;;
+ movl r4=IA64_PSR_MC
+ ;;
+ rse_return_context(r4,r3,r2) // switch from interrupt context for RSE
+
+ // let us restore all the registers from our PSI structure
+ mov r8=gp
+ ;;
+begin_os_mca_restore:
+ br ia64_os_mca_proc_state_restore;;
+
+ia64_os_mca_done_restore:
+ OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2);;
+ // branch back to SALE_CHECK
+ ld8 r3=[r2];;
+ mov b0=r3;; // SAL_CHECK return address
+
+ // release lock
+ movl r3=ia64_mca_serialize;;
+ DATA_VA_TO_PA(r3);;
+ st8.rel [r3]=r0
+
+ br b0
+ ;;
+ia64_os_mca_dispatch_end:
+//EndMain//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+// ia64_os_mca_proc_state_dump()
+//
+// Stub Description:
+//
+// This stub dumps the processor state during MCHK to a data area
+//
+//--
+
+ia64_os_mca_proc_state_dump:
+// Save bank 1 GRs 16-31 which will be used by c-language code when we switch
+// to virtual addressing mode.
+ GET_IA64_MCA_DATA(r2)
+ ;;
+ add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2
+ ;;
+// save ar.NaT
+ mov r5=ar.unat // ar.unat
+
+// save banked GRs 16-31 along with NaT bits
+ bsw.1;;
+ st8.spill [r2]=r16,8;;
+ st8.spill [r2]=r17,8;;
+ st8.spill [r2]=r18,8;;
+ st8.spill [r2]=r19,8;;
+ st8.spill [r2]=r20,8;;
+ st8.spill [r2]=r21,8;;
+ st8.spill [r2]=r22,8;;
+ st8.spill [r2]=r23,8;;
+ st8.spill [r2]=r24,8;;
+ st8.spill [r2]=r25,8;;
+ st8.spill [r2]=r26,8;;
+ st8.spill [r2]=r27,8;;
+ st8.spill [r2]=r28,8;;
+ st8.spill [r2]=r29,8;;
+ st8.spill [r2]=r30,8;;
+ st8.spill [r2]=r31,8;;
+
+ mov r4=ar.unat;;
+ st8 [r2]=r4,8 // save User NaT bits for r16-r31
+ mov ar.unat=r5 // restore original unat
+ bsw.0;;
+
+//save BRs
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2 // duplicate r2 in r4
+
+ mov r3=b0
+ mov r5=b1
+ mov r7=b2;;
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=b3
+ mov r5=b4
+ mov r7=b5;;
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=b6
+ mov r5=b7;;
+ st8 [r2]=r3,2*8
+ st8 [r4]=r5,2*8;;
+
+cSaveCRs:
+// save CRs
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2 // duplicate r2 in r4
+
+ mov r3=cr.dcr
+ mov r5=cr.itm
+ mov r7=cr.iva;;
+
+ st8 [r2]=r3,8*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;; // 48 byte rements
+
+ mov r3=cr.pta;;
+ st8 [r2]=r3,8*8;; // 64 byte rements
+
+// if PSR.ic=0, reading interruption registers causes an illegal operation fault
+ mov r3=psr;;
+ tbit.nz.unc p6,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test
+(p6) st8 [r2]=r0,9*8+160 // increment by 232 byte inc.
+begin_skip_intr_regs:
+(p6) br SkipIntrRegs;;
+
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2 // duplicate r2 in r6
+
+ mov r3=cr.ipsr
+ mov r5=cr.isr
+ mov r7=r0;;
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=cr.iip
+ mov r5=cr.ifa
+ mov r7=cr.itir;;
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=cr.iipa
+ mov r5=cr.ifs
+ mov r7=cr.iim;;
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=cr25;; // cr.iha
+ st8 [r2]=r3,160;; // 160 byte rement
+
+SkipIntrRegs:
+ st8 [r2]=r0,152;; // another 152 byte .
+
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2 // duplicate r2 in r6
+
+ mov r3=cr.lid
+// mov r5=cr.ivr // cr.ivr, don't read it
+ mov r7=cr.tpr;;
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=r0 // cr.eoi => cr67
+ mov r5=r0 // cr.irr0 => cr68
+ mov r7=r0;; // cr.irr1 => cr69
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=r0 // cr.irr2 => cr70
+ mov r5=r0 // cr.irr3 => cr71
+ mov r7=cr.itv;;
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=cr.pmv
+ mov r5=cr.cmcv;;
+ st8 [r2]=r3,7*8
+ st8 [r4]=r5,7*8;;
+
+ mov r3=r0 // cr.lrr0 => cr80
+ mov r5=r0;; // cr.lrr1 => cr81
+ st8 [r2]=r3,23*8
+ st8 [r4]=r5,23*8;;
+
+ adds r2=25*8,r2;;
+
+cSaveARs:
+// save ARs
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2 // duplicate r2 in r6
+
+ mov r3=ar.k0
+ mov r5=ar.k1
+ mov r7=ar.k2;;
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=ar.k3
+ mov r5=ar.k4
+ mov r7=ar.k5;;
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=ar.k6
+ mov r5=ar.k7
+ mov r7=r0;; // ar.kr8
+ st8 [r2]=r3,10*8
+ st8 [r4]=r5,10*8
+ st8 [r6]=r7,10*8;; // rement by 72 bytes
+
+ mov r3=ar.rsc
+ mov ar.rsc=r0 // put RSE in enforced lazy mode
+ mov r5=ar.bsp
+ ;;
+ mov r7=ar.bspstore;;
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=ar.rnat;;
+ st8 [r2]=r3,8*13 // increment by 13x8 bytes
+
+ mov r3=ar.ccv;;
+ st8 [r2]=r3,8*4
+
+ mov r3=ar.unat;;
+ st8 [r2]=r3,8*4
+
+ mov r3=ar.fpsr;;
+ st8 [r2]=r3,8*4
+
+ mov r3=ar.itc;;
+ st8 [r2]=r3,160 // 160
+
+ mov r3=ar.pfs;;
+ st8 [r2]=r3,8
+
+ mov r3=ar.lc;;
+ st8 [r2]=r3,8
+
+ mov r3=ar.ec;;
+ st8 [r2]=r3
+ add r2=8*62,r2 //padding
+
+// save RRs
+ mov ar.lc=0x08-1
+ movl r4=0x00;;
+
+cStRR:
+ dep.z r5=r4,61,3;;
+ mov r3=rr[r5];;
+ st8 [r2]=r3,8
+ add r4=1,r4
+ br.cloop.sptk.few cStRR
+ ;;
+end_os_mca_dump:
+ br ia64_os_mca_done_dump;;
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+// ia64_os_mca_proc_state_restore()
+//
+// Stub Description:
+//
+// This is a stub to restore the saved processor state during MCHK
+//
+//--
+
+ia64_os_mca_proc_state_restore:
+
+// Restore bank1 GR16-31
+ GET_IA64_MCA_DATA(r2)
+ ;;
+ add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2
+
+restore_GRs: // restore bank-1 GRs 16-31
+ bsw.1;;
+ add r3=16*8,r2;; // to get to NaT of GR 16-31
+ ld8 r3=[r3];;
+ mov ar.unat=r3;; // first restore NaT
+
+ ld8.fill r16=[r2],8;;
+ ld8.fill r17=[r2],8;;
+ ld8.fill r18=[r2],8;;
+ ld8.fill r19=[r2],8;;
+ ld8.fill r20=[r2],8;;
+ ld8.fill r21=[r2],8;;
+ ld8.fill r22=[r2],8;;
+ ld8.fill r23=[r2],8;;
+ ld8.fill r24=[r2],8;;
+ ld8.fill r25=[r2],8;;
+ ld8.fill r26=[r2],8;;
+ ld8.fill r27=[r2],8;;
+ ld8.fill r28=[r2],8;;
+ ld8.fill r29=[r2],8;;
+ ld8.fill r30=[r2],8;;
+ ld8.fill r31=[r2],8;;
+
+ ld8 r3=[r2],8;; // increment to skip NaT
+ bsw.0;;
+
+restore_BRs:
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2;; // duplicate r2 in r4
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov b0=r3
+ mov b1=r5
+ mov b2=r7;;
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov b3=r3
+ mov b4=r5
+ mov b5=r7;;
+
+ ld8 r3=[r2],2*8
+ ld8 r5=[r4],2*8;;
+ mov b6=r3
+ mov b7=r5;;
+
+restore_CRs:
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2;; // duplicate r2 in r4
+
+ ld8 r3=[r2],8*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;; // 48 byte increments
+ mov cr.dcr=r3
+ mov cr.itm=r5
+ mov cr.iva=r7;;
+
+ ld8 r3=[r2],8*8;; // 64 byte increments
+// mov cr.pta=r3
+
+
+// if PSR.ic=1, reading interruption registers causes an illegal operation fault
+ mov r3=psr;;
+ tbit.nz.unc p6,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test
+(p6) st8 [r2]=r0,9*8+160 // increment by 232 byte inc.
+
+begin_rskip_intr_regs:
+(p6) br rSkipIntrRegs;;
+
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2;; // duplicate r2 in r4
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov cr.ipsr=r3
+// mov cr.isr=r5 // cr.isr is read only
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov cr.iip=r3
+ mov cr.ifa=r5
+ mov cr.itir=r7;;
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov cr.iipa=r3
+ mov cr.ifs=r5
+ mov cr.iim=r7
+
+ ld8 r3=[r2],160;; // 160 byte increment
+ mov cr.iha=r3
+
+rSkipIntrRegs:
+ ld8 r3=[r2],152;; // another 152 byte inc.
+
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2;; // duplicate r2 in r6
+
+ ld8 r3=[r2],8*3
+ ld8 r5=[r4],8*3
+ ld8 r7=[r6],8*3;;
+ mov cr.lid=r3
+// mov cr.ivr=r5 // cr.ivr is read only
+ mov cr.tpr=r7;;
+
+ ld8 r3=[r2],8*3
+ ld8 r5=[r4],8*3
+ ld8 r7=[r6],8*3;;
+// mov cr.eoi=r3
+// mov cr.irr0=r5 // cr.irr0 is read only
+// mov cr.irr1=r7;; // cr.irr1 is read only
+
+ ld8 r3=[r2],8*3
+ ld8 r5=[r4],8*3
+ ld8 r7=[r6],8*3;;
+// mov cr.irr2=r3 // cr.irr2 is read only
+// mov cr.irr3=r5 // cr.irr3 is read only
+ mov cr.itv=r7;;
+
+ ld8 r3=[r2],8*7
+ ld8 r5=[r4],8*7;;
+ mov cr.pmv=r3
+ mov cr.cmcv=r5;;
+
+ ld8 r3=[r2],8*23
+ ld8 r5=[r4],8*23;;
+ adds r2=8*23,r2
+ adds r4=8*23,r4;;
+// mov cr.lrr0=r3
+// mov cr.lrr1=r5
+
+ adds r2=8*2,r2;;
+
+restore_ARs:
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2;; // duplicate r2 in r4
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov ar.k0=r3
+ mov ar.k1=r5
+ mov ar.k2=r7;;
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov ar.k3=r3
+ mov ar.k4=r5
+ mov ar.k5=r7;;
+
+ ld8 r3=[r2],10*8
+ ld8 r5=[r4],10*8
+ ld8 r7=[r6],10*8;;
+ mov ar.k6=r3
+ mov ar.k7=r5
+ ;;
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+// mov ar.rsc=r3
+// mov ar.bsp=r5 // ar.bsp is read only
+ mov ar.rsc=r0 // make sure that RSE is in enforced lazy mode
+ ;;
+ mov ar.bspstore=r7;;
+
+ ld8 r9=[r2],8*13;;
+ mov ar.rnat=r9
+
+ mov ar.rsc=r3
+ ld8 r3=[r2],8*4;;
+ mov ar.ccv=r3
+
+ ld8 r3=[r2],8*4;;
+ mov ar.unat=r3
+
+ ld8 r3=[r2],8*4;;
+ mov ar.fpsr=r3
+
+ ld8 r3=[r2],160;; // 160
+// mov ar.itc=r3
+
+ ld8 r3=[r2],8;;
+ mov ar.pfs=r3
+
+ ld8 r3=[r2],8;;
+ mov ar.lc=r3
+
+ ld8 r3=[r2];;
+ mov ar.ec=r3
+ add r2=8*62,r2;; // padding
+
+restore_RRs:
+ mov r5=ar.lc
+ mov ar.lc=0x08-1
+ movl r4=0x00;;
+cStRRr:
+ dep.z r7=r4,61,3
+ ld8 r3=[r2],8;;
+ mov rr[r7]=r3 // what are its access previledges?
+ add r4=1,r4
+ br.cloop.sptk.few cStRRr
+ ;;
+ mov ar.lc=r5
+ ;;
+end_os_mca_restore:
+ br ia64_os_mca_done_restore;;
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
+// ok, the issue here is that we need to save state information so
+// it can be useable by the kernel debugger and show regs routines.
+// In order to do this, our best bet is save the current state (plus
+// the state information obtain from the MIN_STATE_AREA) into a pt_regs
+// format. This way we can pass it on in a useable format.
+//
+
+//
+// SAL to OS entry point for INIT on the monarch processor
+// This has been defined for registration purposes with SAL
+// as a part of ia64_mca_init.
+//
+// When we get here, the following registers have been
+// set by the SAL for our use
+//
+// 1. GR1 = OS INIT GP
+// 2. GR8 = PAL_PROC physical address
+// 3. GR9 = SAL_PROC physical address
+// 4. GR10 = SAL GP (physical)
+// 5. GR11 = Init Reason
+// 0 = Received INIT for event other than crash dump switch
+// 1 = Received wakeup at the end of an OS_MCA corrected machine check
+// 2 = Received INIT dude to CrashDump switch assertion
+//
+// 6. GR12 = Return address to location within SAL_INIT procedure
+
+
+GLOBAL_ENTRY(ia64_monarch_init_handler)
+ .prologue
+ // stash the information the SAL passed to os
+ SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
+ ;;
+ SAVE_MIN_WITH_COVER
+ ;;
+ mov r8=cr.ifa
+ mov r9=cr.isr
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ SAVE_REST
+
+// ok, enough should be saved at this point to be dangerous, and supply
+// information for a dump
+// We need to switch to Virtual mode before hitting the C functions.
+
+ movl r2=IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN
+ mov r3=psr // get the current psr, minimum enabled at this point
+ ;;
+ or r2=r2,r3
+ ;;
+ movl r3=IVirtual_Switch
+ ;;
+ mov cr.iip=r3 // short return to set the appropriate bits
+ mov cr.ipsr=r2 // need to do an rfi to set appropriate bits
+ ;;
+ rfi
+ ;;
+IVirtual_Switch:
+ //
+ // We should now be running virtual
+ //
+ // Let's call the C handler to get the rest of the state info
+ //
+ alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
+ ;;
+ adds out0=16,sp // out0 = pointer to pt_regs
+ ;;
+ DO_SAVE_SWITCH_STACK
+ .body
+ adds out1=16,sp // out0 = pointer to switch_stack
+
+ br.call.sptk.many rp=ia64_init_handler
+.ret1:
+
+return_from_init:
+ br.sptk return_from_init
+END(ia64_monarch_init_handler)
+
+//
+// SAL to OS entry point for INIT on the slave processor
+// This has been defined for registration purposes with SAL
+// as a part of ia64_mca_init.
+//
+
+GLOBAL_ENTRY(ia64_slave_init_handler)
+1: br.sptk 1b
+END(ia64_slave_init_handler)
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
new file mode 100644
index 00000000000..ab478172c34
--- /dev/null
+++ b/arch/ia64/kernel/mca_drv.c
@@ -0,0 +1,639 @@
+/*
+ * File: mca_drv.c
+ * Purpose: Generic MCA handling layer
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kallsyms.h>
+#include <linux/smp_lock.h>
+#include <linux/bootmem.h>
+#include <linux/acpi.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/workqueue.h>
+#include <linux/mm.h>
+
+#include <asm/delay.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/sal.h>
+#include <asm/mca.h>
+
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+
+#include "mca_drv.h"
+
+/* max size of SAL error record (default) */
+static int sal_rec_max = 10000;
+
+/* from mca.c */
+static ia64_mca_sal_to_os_state_t *sal_to_os_handoff_state;
+static ia64_mca_os_to_sal_state_t *os_to_sal_handoff_state;
+
+/* from mca_drv_asm.S */
+extern void *mca_handler_bhhook(void);
+
+static DEFINE_SPINLOCK(mca_bh_lock);
+
+typedef enum {
+ MCA_IS_LOCAL = 0,
+ MCA_IS_GLOBAL = 1
+} mca_type_t;
+
+#define MAX_PAGE_ISOLATE 1024
+
+static struct page *page_isolate[MAX_PAGE_ISOLATE];
+static int num_page_isolate = 0;
+
+typedef enum {
+ ISOLATE_NG = 0,
+ ISOLATE_OK = 1
+} isolate_status_t;
+
+/*
+ * This pool keeps pointers to the section part of SAL error record
+ */
+static struct {
+ slidx_list_t *buffer; /* section pointer list pool */
+ int cur_idx; /* Current index of section pointer list pool */
+ int max_idx; /* Maximum index of section pointer list pool */
+} slidx_pool;
+
+/**
+ * mca_page_isolate - isolate a poisoned page in order not to use it later
+ * @paddr: poisoned memory location
+ *
+ * Return value:
+ * ISOLATE_OK / ISOLATE_NG
+ */
+
+static isolate_status_t
+mca_page_isolate(unsigned long paddr)
+{
+ int i;
+ struct page *p;
+
+ /* whether physical address is valid or not */
+ if ( !ia64_phys_addr_valid(paddr) )
+ return ISOLATE_NG;
+
+ /* convert physical address to physical page number */
+ p = pfn_to_page(paddr>>PAGE_SHIFT);
+
+ /* check whether a page number have been already registered or not */
+ for( i = 0; i < num_page_isolate; i++ )
+ if( page_isolate[i] == p )
+ return ISOLATE_OK; /* already listed */
+
+ /* limitation check */
+ if( num_page_isolate == MAX_PAGE_ISOLATE )
+ return ISOLATE_NG;
+
+ /* kick pages having attribute 'SLAB' or 'Reserved' */
+ if( PageSlab(p) || PageReserved(p) )
+ return ISOLATE_NG;
+
+ /* add attribute 'Reserved' and register the page */
+ SetPageReserved(p);
+ page_isolate[num_page_isolate++] = p;
+
+ return ISOLATE_OK;
+}
+
+/**
+ * mca_hanlder_bh - Kill the process which occurred memory read error
+ * @paddr: poisoned address received from MCA Handler
+ */
+
+void
+mca_handler_bh(unsigned long paddr)
+{
+ printk(KERN_DEBUG "OS_MCA: process [pid: %d](%s) encounters MCA.\n",
+ current->pid, current->comm);
+
+ spin_lock(&mca_bh_lock);
+ if (mca_page_isolate(paddr) == ISOLATE_OK) {
+ printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr);
+ } else {
+ printk(KERN_DEBUG "Page isolation: ( %lx ) failure.\n", paddr);
+ }
+ spin_unlock(&mca_bh_lock);
+
+ /* This process is about to be killed itself */
+ force_sig(SIGKILL, current);
+ schedule();
+}
+
+/**
+ * mca_make_peidx - Make index of processor error section
+ * @slpi: pointer to record of processor error section
+ * @peidx: pointer to index of processor error section
+ */
+
+static void
+mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
+{
+ /*
+ * calculate the start address of
+ * "struct cpuid_info" and "sal_processor_static_info_t".
+ */
+ u64 total_check_num = slpi->valid.num_cache_check
+ + slpi->valid.num_tlb_check
+ + slpi->valid.num_bus_check
+ + slpi->valid.num_reg_file_check
+ + slpi->valid.num_ms_check;
+ u64 head_size = sizeof(sal_log_mod_error_info_t) * total_check_num
+ + sizeof(sal_log_processor_info_t);
+ u64 mid_size = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info);
+
+ peidx_head(peidx) = slpi;
+ peidx_mid(peidx) = (struct sal_cpuid_info *)
+ (slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL);
+ peidx_bottom(peidx) = (sal_processor_static_info_t *)
+ (slpi->valid.psi_static_struct ?
+ ((char*)slpi + head_size + mid_size) : NULL);
+}
+
+/**
+ * mca_make_slidx - Make index of SAL error record
+ * @buffer: pointer to SAL error record
+ * @slidx: pointer to index of SAL error record
+ *
+ * Return value:
+ * 1 if record has platform error / 0 if not
+ */
+#define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \
+ { slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \
+ hl->hdr = ptr; \
+ list_add(&hl->list, &(sect)); \
+ slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; }
+
+static int
+mca_make_slidx(void *buffer, slidx_table_t *slidx)
+{
+ int platform_err = 0;
+ int record_len = ((sal_log_record_header_t*)buffer)->len;
+ u32 ercd_pos;
+ int sects;
+ sal_log_section_hdr_t *sp;
+
+ /*
+ * Initialize index referring current record
+ */
+ INIT_LIST_HEAD(&(slidx->proc_err));
+ INIT_LIST_HEAD(&(slidx->mem_dev_err));
+ INIT_LIST_HEAD(&(slidx->sel_dev_err));
+ INIT_LIST_HEAD(&(slidx->pci_bus_err));
+ INIT_LIST_HEAD(&(slidx->smbios_dev_err));
+ INIT_LIST_HEAD(&(slidx->pci_comp_err));
+ INIT_LIST_HEAD(&(slidx->plat_specific_err));
+ INIT_LIST_HEAD(&(slidx->host_ctlr_err));
+ INIT_LIST_HEAD(&(slidx->plat_bus_err));
+ INIT_LIST_HEAD(&(slidx->unsupported));
+
+ /*
+ * Extract a Record Header
+ */
+ slidx->header = buffer;
+
+ /*
+ * Extract each section records
+ * (arranged from "int ia64_log_platform_info_print()")
+ */
+ for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0;
+ ercd_pos < record_len; ercd_pos += sp->len, sects++) {
+ sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos);
+ if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) {
+ LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp);
+ } else if (!efi_guidcmp(sp->guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) {
+ platform_err = 1;
+ LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp);
+ } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) {
+ platform_err = 1;
+ LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp);
+ } else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) {
+ platform_err = 1;
+ LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp);
+ } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) {
+ platform_err = 1;
+ LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp);
+ } else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) {
+ platform_err = 1;
+ LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp);
+ } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) {
+ platform_err = 1;
+ LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp);
+ } else if (!efi_guidcmp(sp->guid, SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) {
+ platform_err = 1;
+ LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp);
+ } else if (!efi_guidcmp(sp->guid, SAL_PLAT_BUS_ERR_SECT_GUID)) {
+ platform_err = 1;
+ LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp);
+ } else {
+ LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp);
+ }
+ }
+ slidx->n_sections = sects;
+
+ return platform_err;
+}
+
+/**
+ * init_record_index_pools - Initialize pool of lists for SAL record index
+ *
+ * Return value:
+ * 0 on Success / -ENOMEM on Failure
+ */
+static int
+init_record_index_pools(void)
+{
+ int i;
+ int rec_max_size; /* Maximum size of SAL error records */
+ int sect_min_size; /* Minimum size of SAL error sections */
+ /* minimum size table of each section */
+ static int sal_log_sect_min_sizes[] = {
+ sizeof(sal_log_processor_info_t) + sizeof(sal_processor_static_info_t),
+ sizeof(sal_log_mem_dev_err_info_t),
+ sizeof(sal_log_sel_dev_err_info_t),
+ sizeof(sal_log_pci_bus_err_info_t),
+ sizeof(sal_log_smbios_dev_err_info_t),
+ sizeof(sal_log_pci_comp_err_info_t),
+ sizeof(sal_log_plat_specific_err_info_t),
+ sizeof(sal_log_host_ctlr_err_info_t),
+ sizeof(sal_log_plat_bus_err_info_t),
+ };
+
+ /*
+ * MCA handler cannot allocate new memory on flight,
+ * so we preallocate enough memory to handle a SAL record.
+ *
+ * Initialize a handling set of slidx_pool:
+ * 1. Pick up the max size of SAL error records
+ * 2. Pick up the min size of SAL error sections
+ * 3. Allocate the pool as enough to 2 SAL records
+ * (now we can estimate the maxinum of section in a record.)
+ */
+
+ /* - 1 - */
+ rec_max_size = sal_rec_max;
+
+ /* - 2 - */
+ sect_min_size = sal_log_sect_min_sizes[0];
+ for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++)
+ if (sect_min_size > sal_log_sect_min_sizes[i])
+ sect_min_size = sal_log_sect_min_sizes[i];
+
+ /* - 3 - */
+ slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
+ slidx_pool.buffer = (slidx_list_t *) kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL);
+
+ return slidx_pool.buffer ? 0 : -ENOMEM;
+}
+
+
+/*****************************************************************************
+ * Recovery functions *
+ *****************************************************************************/
+
+/**
+ * is_mca_global - Check whether this MCA is global or not
+ * @peidx: pointer of index of processor error section
+ * @pbci: pointer to pal_bus_check_info_t
+ *
+ * Return value:
+ * MCA_IS_LOCAL / MCA_IS_GLOBAL
+ */
+
+static mca_type_t
+is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+{
+ pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
+
+ /*
+ * PAL can request a rendezvous, if the MCA has a global scope.
+ * If "rz_always" flag is set, SAL requests MCA rendezvous
+ * in spite of global MCA.
+ * Therefore it is local MCA when rendezvous has not been requested.
+ * Failed to rendezvous, the system must be down.
+ */
+ switch (sal_to_os_handoff_state->imsto_rendez_state) {
+ case -1: /* SAL rendezvous unsuccessful */
+ return MCA_IS_GLOBAL;
+ case 0: /* SAL rendezvous not required */
+ return MCA_IS_LOCAL;
+ case 1: /* SAL rendezvous successful int */
+ case 2: /* SAL rendezvous successful int with init */
+ default:
+ break;
+ }
+
+ /*
+ * If One or more Cache/TLB/Reg_File/Uarch_Check is here,
+ * it would be a local MCA. (i.e. processor internal error)
+ */
+ if (psp->tc || psp->cc || psp->rc || psp->uc)
+ return MCA_IS_LOCAL;
+
+ /*
+ * Bus_Check structure with Bus_Check.ib (internal bus error) flag set
+ * would be a global MCA. (e.g. a system bus address parity error)
+ */
+ if (!pbci || pbci->ib)
+ return MCA_IS_GLOBAL;
+
+ /*
+ * Bus_Check structure with Bus_Check.eb (external bus error) flag set
+ * could be either a local MCA or a global MCA.
+ *
+ * Referring Bus_Check.bsi:
+ * 0: Unknown/unclassified
+ * 1: BERR#
+ * 2: BINIT#
+ * 3: Hard Fail
+ * (FIXME: Are these SGI specific or generic bsi values?)
+ */
+ if (pbci->eb)
+ switch (pbci->bsi) {
+ case 0:
+ /* e.g. a load from poisoned memory */
+ return MCA_IS_LOCAL;
+ case 1:
+ case 2:
+ case 3:
+ return MCA_IS_GLOBAL;
+ }
+
+ return MCA_IS_GLOBAL;
+}
+
+/**
+ * recover_from_read_error - Try to recover the errors which type are "read"s.
+ * @slidx: pointer of index of SAL error record
+ * @peidx: pointer of index of processor error section
+ * @pbci: pointer of pal_bus_check_info
+ *
+ * Return value:
+ * 1 on Success / 0 on Failure
+ */
+
+static int
+recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+{
+ sal_log_mod_error_info_t *smei;
+ pal_min_state_area_t *pmsa;
+ struct ia64_psr *psr1, *psr2;
+ ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook;
+
+ /* Is target address valid? */
+ if (!pbci->tv)
+ return 0;
+
+ /*
+ * cpu read or memory-mapped io read
+ *
+ * offending process affected process OS MCA do
+ * kernel mode kernel mode down system
+ * kernel mode user mode kill the process
+ * user mode kernel mode down system (*)
+ * user mode user mode kill the process
+ *
+ * (*) You could terminate offending user-mode process
+ * if (pbci->pv && pbci->pl != 0) *and* if you sure
+ * the process not have any locks of kernel.
+ */
+
+ psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
+
+ /*
+ * Check the privilege level of interrupted context.
+ * If it is user-mode, then terminate affected process.
+ */
+ if (psr1->cpl != 0) {
+ smei = peidx_bus_check(peidx, 0);
+ if (smei->valid.target_identifier) {
+ /*
+ * setup for resume to bottom half of MCA,
+ * "mca_handler_bhhook"
+ */
+ pmsa = (pal_min_state_area_t *)(sal_to_os_handoff_state->pal_min_state | (6ul<<61));
+ /* pass to bhhook as 1st argument (gr8) */
+ pmsa->pmsa_gr[8-1] = smei->target_identifier;
+ /* set interrupted return address (but no use) */
+ pmsa->pmsa_br0 = pmsa->pmsa_iip;
+ /* change resume address to bottom half */
+ pmsa->pmsa_iip = mca_hdlr_bh->fp;
+ pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp;
+ /* set cpl with kernel mode */
+ psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
+ psr2->cpl = 0;
+ psr2->ri = 0;
+
+ return 1;
+ }
+
+ }
+
+ return 0;
+}
+
+/**
+ * recover_from_platform_error - Recover from platform error.
+ * @slidx: pointer of index of SAL error record
+ * @peidx: pointer of index of processor error section
+ * @pbci: pointer of pal_bus_check_info
+ *
+ * Return value:
+ * 1 on Success / 0 on Failure
+ */
+
+static int
+recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+{
+ int status = 0;
+ pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
+
+ if (psp->bc && pbci->eb && pbci->bsi == 0) {
+ switch(pbci->type) {
+ case 1: /* partial read */
+ case 3: /* full line(cpu) read */
+ case 9: /* I/O space read */
+ status = recover_from_read_error(slidx, peidx, pbci);
+ break;
+ case 0: /* unknown */
+ case 2: /* partial write */
+ case 4: /* full line write */
+ case 5: /* implicit or explicit write-back operation */
+ case 6: /* snoop probe */
+ case 7: /* incoming or outgoing ptc.g */
+ case 8: /* write coalescing transactions */
+ case 10: /* I/O space write */
+ case 11: /* inter-processor interrupt message(IPI) */
+ case 12: /* interrupt acknowledge or external task priority cycle */
+ default:
+ break;
+ }
+ }
+
+ return status;
+}
+
+/**
+ * recover_from_processor_error
+ * @platform: whether there are some platform error section or not
+ * @slidx: pointer of index of SAL error record
+ * @peidx: pointer of index of processor error section
+ * @pbci: pointer of pal_bus_check_info
+ *
+ * Return value:
+ * 1 on Success / 0 on Failure
+ */
+/*
+ * Later we try to recover when below all conditions are satisfied.
+ * 1. Only one processor error section is exist.
+ * 2. BUS_CHECK is exist and the others are not exist.(Except TLB_CHECK)
+ * 3. The entry of BUS_CHECK_INFO is 1.
+ * 4. "External bus error" flag is set and the others are not set.
+ */
+
+static int
+recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
+{
+ pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
+
+ /*
+ * We cannot recover errors with other than bus_check.
+ */
+ if (psp->cc || psp->rc || psp->uc)
+ return 0;
+
+ /*
+ * If there is no bus error, record is weird but we need not to recover.
+ */
+ if (psp->bc == 0 || pbci == NULL)
+ return 1;
+
+ /*
+ * Sorry, we cannot handle so many.
+ */
+ if (peidx_bus_check_num(peidx) > 1)
+ return 0;
+ /*
+ * Well, here is only one bus error.
+ */
+ if (pbci->ib || pbci->cc)
+ return 0;
+ if (pbci->eb && pbci->bsi > 0)
+ return 0;
+ if (psp->ci == 0)
+ return 0;
+
+ /*
+ * This is a local MCA and estimated as recoverble external bus error.
+ * (e.g. a load from poisoned memory)
+ * This means "there are some platform errors".
+ */
+ if (platform)
+ return recover_from_platform_error(slidx, peidx, pbci);
+ /*
+ * On account of strange SAL error record, we cannot recover.
+ */
+ return 0;
+}
+
+/**
+ * mca_try_to_recover - Try to recover from MCA
+ * @rec: pointer to a SAL error record
+ *
+ * Return value:
+ * 1 on Success / 0 on Failure
+ */
+
+static int
+mca_try_to_recover(void *rec,
+ ia64_mca_sal_to_os_state_t *sal_to_os_state,
+ ia64_mca_os_to_sal_state_t *os_to_sal_state)
+{
+ int platform_err;
+ int n_proc_err;
+ slidx_table_t slidx;
+ peidx_table_t peidx;
+ pal_bus_check_info_t pbci;
+
+ /* handoff state from/to mca.c */
+ sal_to_os_handoff_state = sal_to_os_state;
+ os_to_sal_handoff_state = os_to_sal_state;
+
+ /* Make index of SAL error record */
+ platform_err = mca_make_slidx(rec, &slidx);
+
+ /* Count processor error sections */
+ n_proc_err = slidx_count(&slidx, proc_err);
+
+ /* Now, OS can recover when there is one processor error section */
+ if (n_proc_err > 1)
+ return 0;
+ else if (n_proc_err == 0) {
+ /* Weird SAL record ... We need not to recover */
+
+ return 1;
+ }
+
+ /* Make index of processor error section */
+ mca_make_peidx((sal_log_processor_info_t*)slidx_first_entry(&slidx.proc_err)->hdr, &peidx);
+
+ /* Extract Processor BUS_CHECK[0] */
+ *((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0);
+
+ /* Check whether MCA is global or not */
+ if (is_mca_global(&peidx, &pbci))
+ return 0;
+
+ /* Try to recover a processor error */
+ return recover_from_processor_error(platform_err, &slidx, &peidx, &pbci);
+}
+
+/*
+ * =============================================================================
+ */
+
+int __init mca_external_handler_init(void)
+{
+ if (init_record_index_pools())
+ return -ENOMEM;
+
+ /* register external mca handlers */
+ if (ia64_reg_MCA_extension(mca_try_to_recover)){
+ printk(KERN_ERR "ia64_reg_MCA_extension failed.\n");
+ kfree(slidx_pool.buffer);
+ return -EFAULT;
+ }
+ return 0;
+}
+
+void __exit mca_external_handler_exit(void)
+{
+ /* unregister external mca handlers */
+ ia64_unreg_MCA_extension();
+ kfree(slidx_pool.buffer);
+}
+
+module_init(mca_external_handler_init);
+module_exit(mca_external_handler_exit);
+
+module_param(sal_rec_max, int, 0644);
+MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record");
+
+MODULE_DESCRIPTION("ia64 platform dependent mca handler driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h
new file mode 100644
index 00000000000..0227b761f2c
--- /dev/null
+++ b/arch/ia64/kernel/mca_drv.h
@@ -0,0 +1,113 @@
+/*
+ * File: mca_drv.h
+ * Purpose: Define helpers for Generic MCA handling
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ */
+/*
+ * Processor error section:
+ *
+ * +-sal_log_processor_info_t *info-------------+
+ * | sal_log_section_hdr_t header; |
+ * | ... |
+ * | sal_log_mod_error_info_t info[0]; |
+ * +-+----------------+-------------------------+
+ * | CACHE_CHECK | ^ num_cache_check v
+ * +----------------+
+ * | TLB_CHECK | ^ num_tlb_check v
+ * +----------------+
+ * | BUS_CHECK | ^ num_bus_check v
+ * +----------------+
+ * | REG_FILE_CHECK | ^ num_reg_file_check v
+ * +----------------+
+ * | MS_CHECK | ^ num_ms_check v
+ * +-struct cpuid_info *id----------------------+
+ * | regs[5]; |
+ * | reserved; |
+ * +-sal_processor_static_info_t *regs----------+
+ * | valid; |
+ * | ... |
+ * | fr[128]; |
+ * +--------------------------------------------+
+ */
+
+/* peidx: index of processor error section */
+typedef struct peidx_table {
+ sal_log_processor_info_t *info;
+ struct sal_cpuid_info *id;
+ sal_processor_static_info_t *regs;
+} peidx_table_t;
+
+#define peidx_head(p) (((p)->info))
+#define peidx_mid(p) (((p)->id))
+#define peidx_bottom(p) (((p)->regs))
+
+#define peidx_psp(p) (&(peidx_head(p)->proc_state_parameter))
+#define peidx_field_valid(p) (&(peidx_head(p)->valid))
+#define peidx_minstate_area(p) (&(peidx_bottom(p)->min_state_area))
+
+#define peidx_cache_check_num(p) (peidx_head(p)->valid.num_cache_check)
+#define peidx_tlb_check_num(p) (peidx_head(p)->valid.num_tlb_check)
+#define peidx_bus_check_num(p) (peidx_head(p)->valid.num_bus_check)
+#define peidx_reg_file_check_num(p) (peidx_head(p)->valid.num_reg_file_check)
+#define peidx_ms_check_num(p) (peidx_head(p)->valid.num_ms_check)
+
+#define peidx_cache_check_idx(p, n) (n)
+#define peidx_tlb_check_idx(p, n) (peidx_cache_check_idx(p, peidx_cache_check_num(p)) + n)
+#define peidx_bus_check_idx(p, n) (peidx_tlb_check_idx(p, peidx_tlb_check_num(p)) + n)
+#define peidx_reg_file_check_idx(p, n) (peidx_bus_check_idx(p, peidx_bus_check_num(p)) + n)
+#define peidx_ms_check_idx(p, n) (peidx_reg_file_check_idx(p, peidx_reg_file_check_num(p)) + n)
+
+#define peidx_mod_error_info(p, name, n) \
+({ int __idx = peidx_##name##_idx(p, n); \
+ sal_log_mod_error_info_t *__ret = NULL; \
+ if (peidx_##name##_num(p) > n) /*BUG*/ \
+ __ret = &(peidx_head(p)->info[__idx]); \
+ __ret; })
+
+#define peidx_cache_check(p, n) peidx_mod_error_info(p, cache_check, n)
+#define peidx_tlb_check(p, n) peidx_mod_error_info(p, tlb_check, n)
+#define peidx_bus_check(p, n) peidx_mod_error_info(p, bus_check, n)
+#define peidx_reg_file_check(p, n) peidx_mod_error_info(p, reg_file_check, n)
+#define peidx_ms_check(p, n) peidx_mod_error_info(p, ms_check, n)
+
+#define peidx_check_info(proc, name, n) \
+({ \
+ sal_log_mod_error_info_t *__info = peidx_mod_error_info(proc, name, n);\
+ u64 __temp = __info && __info->valid.check_info \
+ ? __info->check_info : 0; \
+ __temp; })
+
+/* slidx: index of SAL log error record */
+
+typedef struct slidx_list {
+ struct list_head list;
+ sal_log_section_hdr_t *hdr;
+} slidx_list_t;
+
+typedef struct slidx_table {
+ sal_log_record_header_t *header;
+ int n_sections; /* # of section headers */
+ struct list_head proc_err;
+ struct list_head mem_dev_err;
+ struct list_head sel_dev_err;
+ struct list_head pci_bus_err;
+ struct list_head smbios_dev_err;
+ struct list_head pci_comp_err;
+ struct list_head plat_specific_err;
+ struct list_head host_ctlr_err;
+ struct list_head plat_bus_err;
+ struct list_head unsupported; /* list of unsupported sections */
+} slidx_table_t;
+
+#define slidx_foreach_entry(pos, head) \
+ list_for_each_entry(pos, head, list)
+#define slidx_first_entry(head) \
+ (((head)->next != (head)) ? list_entry((head)->next, typeof(slidx_list_t), list) : NULL)
+#define slidx_count(slidx, sec) \
+({ int __count = 0; \
+ slidx_list_t *__pos; \
+ slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\
+ __count; })
+
diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S
new file mode 100644
index 00000000000..bcfa05acc56
--- /dev/null
+++ b/arch/ia64/kernel/mca_drv_asm.S
@@ -0,0 +1,45 @@
+/*
+ * File: mca_drv_asm.S
+ * Purpose: Assembly portion of Generic MCA handling
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ */
+#include <linux/config.h>
+#include <linux/threads.h>
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+
+GLOBAL_ENTRY(mca_handler_bhhook)
+ invala // clear RSE ?
+ ;; //
+ cover //
+ ;; //
+ clrrrb //
+ ;;
+ alloc r16=ar.pfs,0,2,1,0 // make a new frame
+ ;;
+ mov r13=IA64_KR(CURRENT) // current task pointer
+ ;;
+ adds r12=IA64_TASK_THREAD_KSP_OFFSET,r13
+ ;;
+ ld8 r12=[r12] // stack pointer
+ ;;
+ mov loc0=r16
+ movl loc1=mca_handler_bh // recovery C function
+ ;;
+ mov out0=r8 // poisoned address
+ mov b6=loc1
+ ;;
+ mov loc1=rp
+ ;;
+ br.call.sptk.many rp=b6 // not return ...
+ ;;
+ mov ar.pfs=loc0
+ mov rp=loc1
+ ;;
+ mov r8=r0
+ br.ret.sptk.many rp
+ ;;
+END(mca_handler_bhhook)
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
new file mode 100644
index 00000000000..1dbc7b2497c
--- /dev/null
+++ b/arch/ia64/kernel/minstate.h
@@ -0,0 +1,251 @@
+#include <linux/config.h>
+
+#include <asm/cache.h>
+
+#include "entry.h"
+
+/*
+ * For ivt.s we want to access the stack virtually so we don't have to disable translation
+ * on interrupts.
+ *
+ * On entry:
+ * r1: pointer to current task (ar.k6)
+ */
+#define MINSTATE_START_SAVE_MIN_VIRT \
+(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
+ ;; \
+(pUStk) mov.m r24=ar.rnat; \
+(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \
+(pKStk) mov r1=sp; /* get sp */ \
+ ;; \
+(pUStk) lfetch.fault.excl.nt1 [r22]; \
+(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
+(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \
+ ;; \
+(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
+ ;; \
+(pUStk) mov r18=ar.bsp; \
+(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */
+
+#define MINSTATE_END_SAVE_MIN_VIRT \
+ bsw.1; /* switch back to bank 1 (must be last in insn group) */ \
+ ;;
+
+/*
+ * For mca_asm.S we want to access the stack physically since the state is saved before we
+ * go virtual and don't want to destroy the iip or ipsr.
+ */
+#define MINSTATE_START_SAVE_MIN_PHYS \
+(pKStk) mov r3=IA64_KR(PER_CPU_DATA);; \
+(pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;; \
+(pKStk) ld8 r3 = [r3];; \
+(pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;; \
+(pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \
+(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
+(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \
+ ;; \
+(pUStk) mov r24=ar.rnat; \
+(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
+(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \
+(pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \
+ ;; \
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
+(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \
+ ;; \
+(pUStk) mov r18=ar.bsp; \
+(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
+
+#define MINSTATE_END_SAVE_MIN_PHYS \
+ dep r12=-1,r12,61,3; /* make sp a kernel virtual address */ \
+ ;;
+
+#ifdef MINSTATE_VIRT
+# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT)
+# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_VIRT
+# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_VIRT
+#endif
+
+#ifdef MINSTATE_PHYS
+# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT);; tpa reg=reg
+# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_PHYS
+# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_PHYS
+#endif
+
+/*
+ * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ * psr.ic: off
+ * r31: contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ * psr.ic: off
+ * r2 = points to &pt_regs.r16
+ * r8 = contents of ar.ccv
+ * r9 = contents of ar.csd
+ * r10 = contents of ar.ssd
+ * r11 = FPSR_DEFAULT
+ * r12 = kernel sp (kernel virtual address)
+ * r13 = points to current task_struct (kernel virtual address)
+ * p15 = TRUE if psr.i is set in cr.ipsr
+ * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ * preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro. This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
+ MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \
+ mov r27=ar.rsc; /* M */ \
+ mov r20=r1; /* A */ \
+ mov r25=ar.unat; /* M */ \
+ mov r29=cr.ipsr; /* M */ \
+ mov r26=ar.pfs; /* I */ \
+ mov r28=cr.iip; /* M */ \
+ mov r21=ar.fpsr; /* M */ \
+ COVER; /* B;; (or nothing) */ \
+ ;; \
+ adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \
+ ;; \
+ ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \
+ st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \
+ adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \
+ /* switch from user to kernel RBS: */ \
+ ;; \
+ invala; /* M */ \
+ SAVE_IFS; \
+ cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \
+ ;; \
+ MINSTATE_START_SAVE_MIN \
+ adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \
+ adds r16=PT(CR_IPSR),r1; \
+ ;; \
+ lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
+ st8 [r16]=r29; /* save cr.ipsr */ \
+ ;; \
+ lfetch.fault.excl.nt1 [r17]; \
+ tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \
+ mov r29=b0 \
+ ;; \
+ adds r16=PT(R8),r1; /* initialize first base pointer */ \
+ adds r17=PT(R9),r1; /* initialize second base pointer */ \
+(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r8,16; \
+.mem.offset 8,0; st8.spill [r17]=r9,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r10,24; \
+.mem.offset 8,0; st8.spill [r17]=r11,24; \
+ ;; \
+ st8 [r16]=r28,16; /* save cr.iip */ \
+ st8 [r17]=r30,16; /* save cr.ifs */ \
+(pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \
+ mov r8=ar.ccv; \
+ mov r9=ar.csd; \
+ mov r10=ar.ssd; \
+ movl r11=FPSR_DEFAULT; /* L-unit */ \
+ ;; \
+ st8 [r16]=r25,16; /* save ar.unat */ \
+ st8 [r17]=r26,16; /* save ar.pfs */ \
+ shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \
+ ;; \
+ st8 [r16]=r27,16; /* save ar.rsc */ \
+(pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \
+(pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \
+ ;; /* avoid RAW on r16 & r17 */ \
+(pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \
+ st8 [r17]=r31,16; /* save predicates */ \
+(pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \
+ ;; \
+ st8 [r16]=r29,16; /* save b0 */ \
+ st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \
+ cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \
+.mem.offset 8,0; st8.spill [r17]=r12,16; \
+ adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r13,16; \
+.mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \
+ mov r13=IA64_KR(CURRENT); /* establish `current' */ \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r15,16; \
+.mem.offset 8,0; st8.spill [r17]=r14,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r2,16; \
+.mem.offset 8,0; st8.spill [r17]=r3,16; \
+ adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
+ ;; \
+ EXTRA; \
+ movl r1=__gp; /* establish kernel global pointer */ \
+ ;; \
+ MINSTATE_END_SAVE_MIN
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
+ *
+ * Assumed state upon entry:
+ * psr.ic: on
+ * r2: points to &pt_regs.r16
+ * r3: points to &pt_regs.r17
+ * r8: contents of ar.ccv
+ * r9: contents of ar.csd
+ * r10: contents of ar.ssd
+ * r11: FPSR_DEFAULT
+ *
+ * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
+ */
+#define SAVE_REST \
+.mem.offset 0,0; st8.spill [r2]=r16,16; \
+.mem.offset 8,0; st8.spill [r3]=r17,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2]=r18,16; \
+.mem.offset 8,0; st8.spill [r3]=r19,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2]=r20,16; \
+.mem.offset 8,0; st8.spill [r3]=r21,16; \
+ mov r18=b6; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2]=r22,16; \
+.mem.offset 8,0; st8.spill [r3]=r23,16; \
+ mov r19=b7; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2]=r24,16; \
+.mem.offset 8,0; st8.spill [r3]=r25,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2]=r26,16; \
+.mem.offset 8,0; st8.spill [r3]=r27,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2]=r28,16; \
+.mem.offset 8,0; st8.spill [r3]=r29,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2]=r30,16; \
+.mem.offset 8,0; st8.spill [r3]=r31,32; \
+ ;; \
+ mov ar.fpsr=r11; /* M-unit */ \
+ st8 [r2]=r8,8; /* ar.ccv */ \
+ adds r24=PT(B6)-PT(F7),r3; \
+ ;; \
+ stf.spill [r2]=f6,32; \
+ stf.spill [r3]=f7,32; \
+ ;; \
+ stf.spill [r2]=f8,32; \
+ stf.spill [r3]=f9,32; \
+ ;; \
+ stf.spill [r2]=f10; \
+ stf.spill [r3]=f11; \
+ adds r25=PT(B7)-PT(F11),r3; \
+ ;; \
+ st8 [r24]=r18,16; /* b6 */ \
+ st8 [r25]=r19,16; /* b7 */ \
+ ;; \
+ st8 [r24]=r9; /* ar.csd */ \
+ st8 [r25]=r10; /* ar.ssd */ \
+ ;;
+
+#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov r30=cr.ifs,)
+#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19)
+#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, )
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
new file mode 100644
index 00000000000..febc091c2f0
--- /dev/null
+++ b/arch/ia64/kernel/module.c
@@ -0,0 +1,952 @@
+/*
+ * IA-64-specific support for kernel module loader.
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Loosely based on patch by Rusty Russell.
+ */
+
+/* relocs tested so far:
+
+ DIR64LSB
+ FPTR64LSB
+ GPREL22
+ LDXMOV
+ LDXMOV
+ LTOFF22
+ LTOFF22X
+ LTOFF22X
+ LTOFF_FPTR22
+ PCREL21B (for br.call only; br.cond is not supported out of modules!)
+ PCREL60B (for brl.cond only; brl.call is not supported for modules!)
+ PCREL64LSB
+ SECREL32LSB
+ SEGREL64LSB
+ */
+
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+
+#include <asm/patch.h>
+#include <asm/unaligned.h>
+
+#define ARCH_MODULE_DEBUG 0
+
+#if ARCH_MODULE_DEBUG
+# define DEBUGP printk
+# define inline
+#else
+# define DEBUGP(fmt , a...)
+#endif
+
+#ifdef CONFIG_ITANIUM
+# define USE_BRL 0
+#else
+# define USE_BRL 1
+#endif
+
+#define MAX_LTOFF ((uint64_t) (1 << 22)) /* max. allowable linkage-table offset */
+
+/* Define some relocation helper macros/types: */
+
+#define FORMAT_SHIFT 0
+#define FORMAT_BITS 3
+#define FORMAT_MASK ((1 << FORMAT_BITS) - 1)
+#define VALUE_SHIFT 3
+#define VALUE_BITS 5
+#define VALUE_MASK ((1 << VALUE_BITS) - 1)
+
+enum reloc_target_format {
+ /* direct encoded formats: */
+ RF_NONE = 0,
+ RF_INSN14 = 1,
+ RF_INSN22 = 2,
+ RF_INSN64 = 3,
+ RF_32MSB = 4,
+ RF_32LSB = 5,
+ RF_64MSB = 6,
+ RF_64LSB = 7,
+
+ /* formats that cannot be directly decoded: */
+ RF_INSN60,
+ RF_INSN21B, /* imm21 form 1 */
+ RF_INSN21M, /* imm21 form 2 */
+ RF_INSN21F /* imm21 form 3 */
+};
+
+enum reloc_value_formula {
+ RV_DIRECT = 4, /* S + A */
+ RV_GPREL = 5, /* @gprel(S + A) */
+ RV_LTREL = 6, /* @ltoff(S + A) */
+ RV_PLTREL = 7, /* @pltoff(S + A) */
+ RV_FPTR = 8, /* @fptr(S + A) */
+ RV_PCREL = 9, /* S + A - P */
+ RV_LTREL_FPTR = 10, /* @ltoff(@fptr(S + A)) */
+ RV_SEGREL = 11, /* @segrel(S + A) */
+ RV_SECREL = 12, /* @secrel(S + A) */
+ RV_BDREL = 13, /* BD + A */
+ RV_LTV = 14, /* S + A (like RV_DIRECT, except frozen at static link-time) */
+ RV_PCREL2 = 15, /* S + A - P */
+ RV_SPECIAL = 16, /* various (see below) */
+ RV_RSVD17 = 17,
+ RV_TPREL = 18, /* @tprel(S + A) */
+ RV_LTREL_TPREL = 19, /* @ltoff(@tprel(S + A)) */
+ RV_DTPMOD = 20, /* @dtpmod(S + A) */
+ RV_LTREL_DTPMOD = 21, /* @ltoff(@dtpmod(S + A)) */
+ RV_DTPREL = 22, /* @dtprel(S + A) */
+ RV_LTREL_DTPREL = 23, /* @ltoff(@dtprel(S + A)) */
+ RV_RSVD24 = 24,
+ RV_RSVD25 = 25,
+ RV_RSVD26 = 26,
+ RV_RSVD27 = 27
+ /* 28-31 reserved for implementation-specific purposes. */
+};
+
+#define N(reloc) [R_IA64_##reloc] = #reloc
+
+static const char *reloc_name[256] = {
+ N(NONE), N(IMM14), N(IMM22), N(IMM64),
+ N(DIR32MSB), N(DIR32LSB), N(DIR64MSB), N(DIR64LSB),
+ N(GPREL22), N(GPREL64I), N(GPREL32MSB), N(GPREL32LSB),
+ N(GPREL64MSB), N(GPREL64LSB), N(LTOFF22), N(LTOFF64I),
+ N(PLTOFF22), N(PLTOFF64I), N(PLTOFF64MSB), N(PLTOFF64LSB),
+ N(FPTR64I), N(FPTR32MSB), N(FPTR32LSB), N(FPTR64MSB),
+ N(FPTR64LSB), N(PCREL60B), N(PCREL21B), N(PCREL21M),
+ N(PCREL21F), N(PCREL32MSB), N(PCREL32LSB), N(PCREL64MSB),
+ N(PCREL64LSB), N(LTOFF_FPTR22), N(LTOFF_FPTR64I), N(LTOFF_FPTR32MSB),
+ N(LTOFF_FPTR32LSB), N(LTOFF_FPTR64MSB), N(LTOFF_FPTR64LSB), N(SEGREL32MSB),
+ N(SEGREL32LSB), N(SEGREL64MSB), N(SEGREL64LSB), N(SECREL32MSB),
+ N(SECREL32LSB), N(SECREL64MSB), N(SECREL64LSB), N(REL32MSB),
+ N(REL32LSB), N(REL64MSB), N(REL64LSB), N(LTV32MSB),
+ N(LTV32LSB), N(LTV64MSB), N(LTV64LSB), N(PCREL21BI),
+ N(PCREL22), N(PCREL64I), N(IPLTMSB), N(IPLTLSB),
+ N(COPY), N(LTOFF22X), N(LDXMOV), N(TPREL14),
+ N(TPREL22), N(TPREL64I), N(TPREL64MSB), N(TPREL64LSB),
+ N(LTOFF_TPREL22), N(DTPMOD64MSB), N(DTPMOD64LSB), N(LTOFF_DTPMOD22),
+ N(DTPREL14), N(DTPREL22), N(DTPREL64I), N(DTPREL32MSB),
+ N(DTPREL32LSB), N(DTPREL64MSB), N(DTPREL64LSB), N(LTOFF_DTPREL22)
+};
+
+#undef N
+
+struct got_entry {
+ uint64_t val;
+};
+
+struct fdesc {
+ uint64_t ip;
+ uint64_t gp;
+};
+
+/* Opaque struct for insns, to protect against derefs. */
+struct insn;
+
+static inline uint64_t
+bundle (const struct insn *insn)
+{
+ return (uint64_t) insn & ~0xfUL;
+}
+
+static inline int
+slot (const struct insn *insn)
+{
+ return (uint64_t) insn & 0x3;
+}
+
+static int
+apply_imm64 (struct module *mod, struct insn *insn, uint64_t val)
+{
+ if (slot(insn) != 2) {
+ printk(KERN_ERR "%s: invalid slot number %d for IMM64\n",
+ mod->name, slot(insn));
+ return 0;
+ }
+ ia64_patch_imm64((u64) insn, val);
+ return 1;
+}
+
+static int
+apply_imm60 (struct module *mod, struct insn *insn, uint64_t val)
+{
+ if (slot(insn) != 2) {
+ printk(KERN_ERR "%s: invalid slot number %d for IMM60\n",
+ mod->name, slot(insn));
+ return 0;
+ }
+ if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) {
+ printk(KERN_ERR "%s: value %ld out of IMM60 range\n", mod->name, (int64_t) val);
+ return 0;
+ }
+ ia64_patch_imm60((u64) insn, val);
+ return 1;
+}
+
+static int
+apply_imm22 (struct module *mod, struct insn *insn, uint64_t val)
+{
+ if (val + (1 << 21) >= (1 << 22)) {
+ printk(KERN_ERR "%s: value %li out of IMM22 range\n", mod->name, (int64_t)val);
+ return 0;
+ }
+ ia64_patch((u64) insn, 0x01fffcfe000UL, ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
+ | ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */
+ | ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */
+ | ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */));
+ return 1;
+}
+
+static int
+apply_imm21b (struct module *mod, struct insn *insn, uint64_t val)
+{
+ if (val + (1 << 20) >= (1 << 21)) {
+ printk(KERN_ERR "%s: value %li out of IMM21b range\n", mod->name, (int64_t)val);
+ return 0;
+ }
+ ia64_patch((u64) insn, 0x11ffffe000UL, ( ((val & 0x100000UL) << 16) /* bit 20 -> 36 */
+ | ((val & 0x0fffffUL) << 13) /* bit 0 -> 13 */));
+ return 1;
+}
+
+#if USE_BRL
+
+struct plt_entry {
+ /* Three instruction bundles in PLT. */
+ unsigned char bundle[2][16];
+};
+
+static const struct plt_entry ia64_plt_template = {
+ {
+ {
+ 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */
+ 0x00, 0x00, 0x00, 0x60
+ },
+ {
+ 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* brl.many gp=TARGET_GP */
+ 0x08, 0x00, 0x00, 0xc0
+ }
+ }
+};
+
+static int
+patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp)
+{
+ if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_gp)
+ && apply_imm60(mod, (struct insn *) (plt->bundle[1] + 2),
+ (target_ip - (int64_t) plt->bundle[1]) / 16))
+ return 1;
+ return 0;
+}
+
+unsigned long
+plt_target (struct plt_entry *plt)
+{
+ uint64_t b0, b1, *b = (uint64_t *) plt->bundle[1];
+ long off;
+
+ b0 = b[0]; b1 = b[1];
+ off = ( ((b1 & 0x00fffff000000000UL) >> 36) /* imm20b -> bit 0 */
+ | ((b0 >> 48) << 20) | ((b1 & 0x7fffffUL) << 36) /* imm39 -> bit 20 */
+ | ((b1 & 0x0800000000000000UL) << 0)); /* i -> bit 59 */
+ return (long) plt->bundle[1] + 16*off;
+}
+
+#else /* !USE_BRL */
+
+struct plt_entry {
+ /* Three instruction bundles in PLT. */
+ unsigned char bundle[3][16];
+};
+
+static const struct plt_entry ia64_plt_template = {
+ {
+ {
+ 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* movl r16=TARGET_IP */
+ 0x02, 0x00, 0x00, 0x60
+ },
+ {
+ 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */
+ 0x00, 0x00, 0x00, 0x60
+ },
+ {
+ 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MIB] nop.m 0 */
+ 0x60, 0x80, 0x04, 0x80, 0x03, 0x00, /* mov b6=r16 */
+ 0x60, 0x00, 0x80, 0x00 /* br.few b6 */
+ }
+ }
+};
+
+static int
+patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp)
+{
+ if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_ip)
+ && apply_imm64(mod, (struct insn *) (plt->bundle[1] + 2), target_gp))
+ return 1;
+ return 0;
+}
+
+unsigned long
+plt_target (struct plt_entry *plt)
+{
+ uint64_t b0, b1, *b = (uint64_t *) plt->bundle[0];
+
+ b0 = b[0]; b1 = b[1];
+ return ( ((b1 & 0x000007f000000000) >> 36) /* imm7b -> bit 0 */
+ | ((b1 & 0x07fc000000000000) >> 43) /* imm9d -> bit 7 */
+ | ((b1 & 0x0003e00000000000) >> 29) /* imm5c -> bit 16 */
+ | ((b1 & 0x0000100000000000) >> 23) /* ic -> bit 21 */
+ | ((b0 >> 46) << 22) | ((b1 & 0x7fffff) << 40) /* imm41 -> bit 22 */
+ | ((b1 & 0x0800000000000000) << 4)); /* i -> bit 63 */
+}
+
+#endif /* !USE_BRL */
+
+void *
+module_alloc (unsigned long size)
+{
+ if (!size)
+ return NULL;
+ return vmalloc(size);
+}
+
+void
+module_free (struct module *mod, void *module_region)
+{
+ if (mod->arch.init_unw_table && module_region == mod->module_init) {
+ unw_remove_unwind_table(mod->arch.init_unw_table);
+ mod->arch.init_unw_table = NULL;
+ }
+ vfree(module_region);
+}
+
+/* Have we already seen one of these relocations? */
+/* FIXME: we could look in other sections, too --RR */
+static int
+duplicate_reloc (const Elf64_Rela *rela, unsigned int num)
+{
+ unsigned int i;
+
+ for (i = 0; i < num; i++) {
+ if (rela[i].r_info == rela[num].r_info && rela[i].r_addend == rela[num].r_addend)
+ return 1;
+ }
+ return 0;
+}
+
+/* Count how many GOT entries we may need */
+static unsigned int
+count_gots (const Elf64_Rela *rela, unsigned int num)
+{
+ unsigned int i, ret = 0;
+
+ /* Sure, this is order(n^2), but it's usually short, and not
+ time critical */
+ for (i = 0; i < num; i++) {
+ switch (ELF64_R_TYPE(rela[i].r_info)) {
+ case R_IA64_LTOFF22:
+ case R_IA64_LTOFF22X:
+ case R_IA64_LTOFF64I:
+ case R_IA64_LTOFF_FPTR22:
+ case R_IA64_LTOFF_FPTR64I:
+ case R_IA64_LTOFF_FPTR32MSB:
+ case R_IA64_LTOFF_FPTR32LSB:
+ case R_IA64_LTOFF_FPTR64MSB:
+ case R_IA64_LTOFF_FPTR64LSB:
+ if (!duplicate_reloc(rela, i))
+ ret++;
+ break;
+ }
+ }
+ return ret;
+}
+
+/* Count how many PLT entries we may need */
+static unsigned int
+count_plts (const Elf64_Rela *rela, unsigned int num)
+{
+ unsigned int i, ret = 0;
+
+ /* Sure, this is order(n^2), but it's usually short, and not
+ time critical */
+ for (i = 0; i < num; i++) {
+ switch (ELF64_R_TYPE(rela[i].r_info)) {
+ case R_IA64_PCREL21B:
+ case R_IA64_PLTOFF22:
+ case R_IA64_PLTOFF64I:
+ case R_IA64_PLTOFF64MSB:
+ case R_IA64_PLTOFF64LSB:
+ case R_IA64_IPLTMSB:
+ case R_IA64_IPLTLSB:
+ if (!duplicate_reloc(rela, i))
+ ret++;
+ break;
+ }
+ }
+ return ret;
+}
+
+/* We need to create an function-descriptors for any internal function
+ which is referenced. */
+static unsigned int
+count_fdescs (const Elf64_Rela *rela, unsigned int num)
+{
+ unsigned int i, ret = 0;
+
+ /* Sure, this is order(n^2), but it's usually short, and not time critical. */
+ for (i = 0; i < num; i++) {
+ switch (ELF64_R_TYPE(rela[i].r_info)) {
+ case R_IA64_FPTR64I:
+ case R_IA64_FPTR32LSB:
+ case R_IA64_FPTR32MSB:
+ case R_IA64_FPTR64LSB:
+ case R_IA64_FPTR64MSB:
+ case R_IA64_LTOFF_FPTR22:
+ case R_IA64_LTOFF_FPTR32LSB:
+ case R_IA64_LTOFF_FPTR32MSB:
+ case R_IA64_LTOFF_FPTR64I:
+ case R_IA64_LTOFF_FPTR64LSB:
+ case R_IA64_LTOFF_FPTR64MSB:
+ case R_IA64_IPLTMSB:
+ case R_IA64_IPLTLSB:
+ /*
+ * Jumps to static functions sometimes go straight to their
+ * offset. Of course, that may not be possible if the jump is
+ * from init -> core or vice. versa, so we need to generate an
+ * FDESC (and PLT etc) for that.
+ */
+ case R_IA64_PCREL21B:
+ if (!duplicate_reloc(rela, i))
+ ret++;
+ break;
+ }
+ }
+ return ret;
+}
+
+int
+module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings,
+ struct module *mod)
+{
+ unsigned long core_plts = 0, init_plts = 0, gots = 0, fdescs = 0;
+ Elf64_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
+
+ /*
+ * To store the PLTs and function-descriptors, we expand the .text section for
+ * core module-code and the .init.text section for initialization code.
+ */
+ for (s = sechdrs; s < sechdrs_end; ++s)
+ if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
+ mod->arch.core_plt = s;
+ else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
+ mod->arch.init_plt = s;
+ else if (strcmp(".got", secstrings + s->sh_name) == 0)
+ mod->arch.got = s;
+ else if (strcmp(".opd", secstrings + s->sh_name) == 0)
+ mod->arch.opd = s;
+ else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0)
+ mod->arch.unwind = s;
+
+ if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) {
+ printk(KERN_ERR "%s: sections missing\n", mod->name);
+ return -ENOEXEC;
+ }
+
+ /* GOT and PLTs can occur in any relocated section... */
+ for (s = sechdrs + 1; s < sechdrs_end; ++s) {
+ const Elf64_Rela *rels = (void *)ehdr + s->sh_offset;
+ unsigned long numrels = s->sh_size/sizeof(Elf64_Rela);
+
+ if (s->sh_type != SHT_RELA)
+ continue;
+
+ gots += count_gots(rels, numrels);
+ fdescs += count_fdescs(rels, numrels);
+ if (strstr(secstrings + s->sh_name, ".init"))
+ init_plts += count_plts(rels, numrels);
+ else
+ core_plts += count_plts(rels, numrels);
+ }
+
+ mod->arch.core_plt->sh_type = SHT_NOBITS;
+ mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ mod->arch.core_plt->sh_addralign = 16;
+ mod->arch.core_plt->sh_size = core_plts * sizeof(struct plt_entry);
+ mod->arch.init_plt->sh_type = SHT_NOBITS;
+ mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ mod->arch.init_plt->sh_addralign = 16;
+ mod->arch.init_plt->sh_size = init_plts * sizeof(struct plt_entry);
+ mod->arch.got->sh_type = SHT_NOBITS;
+ mod->arch.got->sh_flags = ARCH_SHF_SMALL | SHF_ALLOC;
+ mod->arch.got->sh_addralign = 8;
+ mod->arch.got->sh_size = gots * sizeof(struct got_entry);
+ mod->arch.opd->sh_type = SHT_NOBITS;
+ mod->arch.opd->sh_flags = SHF_ALLOC;
+ mod->arch.opd->sh_addralign = 8;
+ mod->arch.opd->sh_size = fdescs * sizeof(struct fdesc);
+ DEBUGP("%s: core.plt=%lx, init.plt=%lx, got=%lx, fdesc=%lx\n",
+ __FUNCTION__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size,
+ mod->arch.got->sh_size, mod->arch.opd->sh_size);
+ return 0;
+}
+
+static inline int
+in_init (const struct module *mod, uint64_t addr)
+{
+ return addr - (uint64_t) mod->module_init < mod->init_size;
+}
+
+static inline int
+in_core (const struct module *mod, uint64_t addr)
+{
+ return addr - (uint64_t) mod->module_core < mod->core_size;
+}
+
+static inline int
+is_internal (const struct module *mod, uint64_t value)
+{
+ return in_init(mod, value) || in_core(mod, value);
+}
+
+/*
+ * Get gp-relative offset for the linkage-table entry of VALUE.
+ */
+static uint64_t
+get_ltoff (struct module *mod, uint64_t value, int *okp)
+{
+ struct got_entry *got, *e;
+
+ if (!*okp)
+ return 0;
+
+ got = (void *) mod->arch.got->sh_addr;
+ for (e = got; e < got + mod->arch.next_got_entry; ++e)
+ if (e->val == value)
+ goto found;
+
+ /* Not enough GOT entries? */
+ if (e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size))
+ BUG();
+
+ e->val = value;
+ ++mod->arch.next_got_entry;
+ found:
+ return (uint64_t) e - mod->arch.gp;
+}
+
+static inline int
+gp_addressable (struct module *mod, uint64_t value)
+{
+ return value - mod->arch.gp + MAX_LTOFF/2 < MAX_LTOFF;
+}
+
+/* Get PC-relative PLT entry for this value. Returns 0 on failure. */
+static uint64_t
+get_plt (struct module *mod, const struct insn *insn, uint64_t value, int *okp)
+{
+ struct plt_entry *plt, *plt_end;
+ uint64_t target_ip, target_gp;
+
+ if (!*okp)
+ return 0;
+
+ if (in_init(mod, (uint64_t) insn)) {
+ plt = (void *) mod->arch.init_plt->sh_addr;
+ plt_end = (void *) plt + mod->arch.init_plt->sh_size;
+ } else {
+ plt = (void *) mod->arch.core_plt->sh_addr;
+ plt_end = (void *) plt + mod->arch.core_plt->sh_size;
+ }
+
+ /* "value" is a pointer to a function-descriptor; fetch the target ip/gp from it: */
+ target_ip = ((uint64_t *) value)[0];
+ target_gp = ((uint64_t *) value)[1];
+
+ /* Look for existing PLT entry. */
+ while (plt->bundle[0][0]) {
+ if (plt_target(plt) == target_ip)
+ goto found;
+ if (++plt >= plt_end)
+ BUG();
+ }
+ *plt = ia64_plt_template;
+ if (!patch_plt(mod, plt, target_ip, target_gp)) {
+ *okp = 0;
+ return 0;
+ }
+#if ARCH_MODULE_DEBUG
+ if (plt_target(plt) != target_ip) {
+ printk("%s: mistargeted PLT: wanted %lx, got %lx\n",
+ __FUNCTION__, target_ip, plt_target(plt));
+ *okp = 0;
+ return 0;
+ }
+#endif
+ found:
+ return (uint64_t) plt;
+}
+
+/* Get function descriptor for VALUE. */
+static uint64_t
+get_fdesc (struct module *mod, uint64_t value, int *okp)
+{
+ struct fdesc *fdesc = (void *) mod->arch.opd->sh_addr;
+
+ if (!*okp)
+ return 0;
+
+ if (!value) {
+ printk(KERN_ERR "%s: fdesc for zero requested!\n", mod->name);
+ return 0;
+ }
+
+ if (!is_internal(mod, value))
+ /*
+ * If it's not a module-local entry-point, "value" already points to a
+ * function-descriptor.
+ */
+ return value;
+
+ /* Look for existing function descriptor. */
+ while (fdesc->ip) {
+ if (fdesc->ip == value)
+ return (uint64_t)fdesc;
+ if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size)
+ BUG();
+ }
+
+ /* Create new one */
+ fdesc->ip = value;
+ fdesc->gp = mod->arch.gp;
+ return (uint64_t) fdesc;
+}
+
+static inline int
+do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend,
+ Elf64_Shdr *sec, void *location)
+{
+ enum reloc_target_format format = (r_type >> FORMAT_SHIFT) & FORMAT_MASK;
+ enum reloc_value_formula formula = (r_type >> VALUE_SHIFT) & VALUE_MASK;
+ uint64_t val;
+ int ok = 1;
+
+ val = sym->st_value + addend;
+
+ switch (formula) {
+ case RV_SEGREL: /* segment base is arbitrarily chosen to be 0 for kernel modules */
+ case RV_DIRECT:
+ break;
+
+ case RV_GPREL: val -= mod->arch.gp; break;
+ case RV_LTREL: val = get_ltoff(mod, val, &ok); break;
+ case RV_PLTREL: val = get_plt(mod, location, val, &ok); break;
+ case RV_FPTR: val = get_fdesc(mod, val, &ok); break;
+ case RV_SECREL: val -= sec->sh_addr; break;
+ case RV_LTREL_FPTR: val = get_ltoff(mod, get_fdesc(mod, val, &ok), &ok); break;
+
+ case RV_PCREL:
+ switch (r_type) {
+ case R_IA64_PCREL21B:
+ if ((in_init(mod, val) && in_core(mod, (uint64_t)location)) ||
+ (in_core(mod, val) && in_init(mod, (uint64_t)location))) {
+ /*
+ * Init section may have been allocated far away from core,
+ * if the branch won't reach, then allocate a plt for it.
+ */
+ uint64_t delta = ((int64_t)val - (int64_t)location) / 16;
+ if (delta + (1 << 20) >= (1 << 21)) {
+ val = get_fdesc(mod, val, &ok);
+ val = get_plt(mod, location, val, &ok);
+ }
+ } else if (!is_internal(mod, val))
+ val = get_plt(mod, location, val, &ok);
+ /* FALL THROUGH */
+ default:
+ val -= bundle(location);
+ break;
+
+ case R_IA64_PCREL32MSB:
+ case R_IA64_PCREL32LSB:
+ case R_IA64_PCREL64MSB:
+ case R_IA64_PCREL64LSB:
+ val -= (uint64_t) location;
+ break;
+
+ }
+ switch (r_type) {
+ case R_IA64_PCREL60B: format = RF_INSN60; break;
+ case R_IA64_PCREL21B: format = RF_INSN21B; break;
+ case R_IA64_PCREL21M: format = RF_INSN21M; break;
+ case R_IA64_PCREL21F: format = RF_INSN21F; break;
+ default: break;
+ }
+ break;
+
+ case RV_BDREL:
+ val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core);
+ break;
+
+ case RV_LTV:
+ /* can link-time value relocs happen here? */
+ BUG();
+ break;
+
+ case RV_PCREL2:
+ if (r_type == R_IA64_PCREL21BI) {
+ if (!is_internal(mod, val)) {
+ printk(KERN_ERR "%s: %s reloc against non-local symbol (%lx)\n",
+ __FUNCTION__, reloc_name[r_type], val);
+ return -ENOEXEC;
+ }
+ format = RF_INSN21B;
+ }
+ val -= bundle(location);
+ break;
+
+ case RV_SPECIAL:
+ switch (r_type) {
+ case R_IA64_IPLTMSB:
+ case R_IA64_IPLTLSB:
+ val = get_fdesc(mod, get_plt(mod, location, val, &ok), &ok);
+ format = RF_64LSB;
+ if (r_type == R_IA64_IPLTMSB)
+ format = RF_64MSB;
+ break;
+
+ case R_IA64_SUB:
+ val = addend - sym->st_value;
+ format = RF_INSN64;
+ break;
+
+ case R_IA64_LTOFF22X:
+ if (gp_addressable(mod, val))
+ val -= mod->arch.gp;
+ else
+ val = get_ltoff(mod, val, &ok);
+ format = RF_INSN22;
+ break;
+
+ case R_IA64_LDXMOV:
+ if (gp_addressable(mod, val)) {
+ /* turn "ld8" into "mov": */
+ DEBUGP("%s: patching ld8 at %p to mov\n", __FUNCTION__, location);
+ ia64_patch((u64) location, 0x1fff80fe000UL, 0x10000000000UL);
+ }
+ return 0;
+
+ default:
+ if (reloc_name[r_type])
+ printk(KERN_ERR "%s: special reloc %s not supported",
+ mod->name, reloc_name[r_type]);
+ else
+ printk(KERN_ERR "%s: unknown special reloc %x\n",
+ mod->name, r_type);
+ return -ENOEXEC;
+ }
+ break;
+
+ case RV_TPREL:
+ case RV_LTREL_TPREL:
+ case RV_DTPMOD:
+ case RV_LTREL_DTPMOD:
+ case RV_DTPREL:
+ case RV_LTREL_DTPREL:
+ printk(KERN_ERR "%s: %s reloc not supported\n",
+ mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?");
+ return -ENOEXEC;
+
+ default:
+ printk(KERN_ERR "%s: unknown reloc %x\n", mod->name, r_type);
+ return -ENOEXEC;
+ }
+
+ if (!ok)
+ return -ENOEXEC;
+
+ DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __FUNCTION__, location, val,
+ reloc_name[r_type] ? reloc_name[r_type] : "?", sym->st_value + addend);
+
+ switch (format) {
+ case RF_INSN21B: ok = apply_imm21b(mod, location, (int64_t) val / 16); break;
+ case RF_INSN22: ok = apply_imm22(mod, location, val); break;
+ case RF_INSN64: ok = apply_imm64(mod, location, val); break;
+ case RF_INSN60: ok = apply_imm60(mod, location, (int64_t) val / 16); break;
+ case RF_32LSB: put_unaligned(val, (uint32_t *) location); break;
+ case RF_64LSB: put_unaligned(val, (uint64_t *) location); break;
+ case RF_32MSB: /* ia64 Linux is little-endian... */
+ case RF_64MSB: /* ia64 Linux is little-endian... */
+ case RF_INSN14: /* must be within-module, i.e., resolved by "ld -r" */
+ case RF_INSN21M: /* must be within-module, i.e., resolved by "ld -r" */
+ case RF_INSN21F: /* must be within-module, i.e., resolved by "ld -r" */
+ printk(KERN_ERR "%s: format %u needed by %s reloc is not supported\n",
+ mod->name, format, reloc_name[r_type] ? reloc_name[r_type] : "?");
+ return -ENOEXEC;
+
+ default:
+ printk(KERN_ERR "%s: relocation %s resulted in unknown format %u\n",
+ mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?", format);
+ return -ENOEXEC;
+ }
+ return ok ? 0 : -ENOEXEC;
+}
+
+int
+apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
+ unsigned int relsec, struct module *mod)
+{
+ unsigned int i, n = sechdrs[relsec].sh_size / sizeof(Elf64_Rela);
+ Elf64_Rela *rela = (void *) sechdrs[relsec].sh_addr;
+ Elf64_Shdr *target_sec;
+ int ret;
+
+ DEBUGP("%s: applying section %u (%u relocs) to %u\n", __FUNCTION__,
+ relsec, n, sechdrs[relsec].sh_info);
+
+ target_sec = sechdrs + sechdrs[relsec].sh_info;
+
+ if (target_sec->sh_entsize == ~0UL)
+ /*
+ * If target section wasn't allocated, we don't need to relocate it.
+ * Happens, e.g., for debug sections.
+ */
+ return 0;
+
+ if (!mod->arch.gp) {
+ /*
+ * XXX Should have an arch-hook for running this after final section
+ * addresses have been selected...
+ */
+ /* See if gp can cover the entire core module: */
+ uint64_t gp = (uint64_t) mod->module_core + MAX_LTOFF / 2;
+ if (mod->core_size >= MAX_LTOFF)
+ /*
+ * This takes advantage of fact that SHF_ARCH_SMALL gets allocated
+ * at the end of the module.
+ */
+ gp = (uint64_t) mod->module_core + mod->core_size - MAX_LTOFF / 2;
+ mod->arch.gp = gp;
+ DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp);
+ }
+
+ for (i = 0; i < n; i++) {
+ ret = do_reloc(mod, ELF64_R_TYPE(rela[i].r_info),
+ ((Elf64_Sym *) sechdrs[symindex].sh_addr
+ + ELF64_R_SYM(rela[i].r_info)),
+ rela[i].r_addend, target_sec,
+ (void *) target_sec->sh_addr + rela[i].r_offset);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+int
+apply_relocate (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
+ unsigned int relsec, struct module *mod)
+{
+ printk(KERN_ERR "module %s: REL relocs in section %u unsupported\n", mod->name, relsec);
+ return -ENOEXEC;
+}
+
+/*
+ * Modules contain a single unwind table which covers both the core and the init text
+ * sections but since the two are not contiguous, we need to split this table up such that
+ * we can register (and unregister) each "segment" seperately. Fortunately, this sounds
+ * more complicated than it really is.
+ */
+static void
+register_unwind_table (struct module *mod)
+{
+ struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr;
+ struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start);
+ struct unw_table_entry tmp, *e1, *e2, *core, *init;
+ unsigned long num_init = 0, num_core = 0;
+
+ /* First, count how many init and core unwind-table entries there are. */
+ for (e1 = start; e1 < end; ++e1)
+ if (in_init(mod, e1->start_offset))
+ ++num_init;
+ else
+ ++num_core;
+ /*
+ * Second, sort the table such that all unwind-table entries for the init and core
+ * text sections are nicely separated. We do this with a stupid bubble sort
+ * (unwind tables don't get ridiculously huge).
+ */
+ for (e1 = start; e1 < end; ++e1) {
+ for (e2 = e1 + 1; e2 < end; ++e2) {
+ if (e2->start_offset < e1->start_offset) {
+ tmp = *e1;
+ *e1 = *e2;
+ *e2 = tmp;
+ }
+ }
+ }
+ /*
+ * Third, locate the init and core segments in the unwind table:
+ */
+ if (in_init(mod, start->start_offset)) {
+ init = start;
+ core = start + num_init;
+ } else {
+ core = start;
+ init = start + num_core;
+ }
+
+ DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __FUNCTION__,
+ mod->name, mod->arch.gp, num_init, num_core);
+
+ /*
+ * Fourth, register both tables (if not empty).
+ */
+ if (num_core > 0) {
+ mod->arch.core_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
+ core, core + num_core);
+ DEBUGP("%s: core: handle=%p [%p-%p)\n", __FUNCTION__,
+ mod->arch.core_unw_table, core, core + num_core);
+ }
+ if (num_init > 0) {
+ mod->arch.init_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
+ init, init + num_init);
+ DEBUGP("%s: init: handle=%p [%p-%p)\n", __FUNCTION__,
+ mod->arch.init_unw_table, init, init + num_init);
+ }
+}
+
+int
+module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod)
+{
+ DEBUGP("%s: init: entry=%p\n", __FUNCTION__, mod->init);
+ if (mod->arch.unwind)
+ register_unwind_table(mod);
+ return 0;
+}
+
+void
+module_arch_cleanup (struct module *mod)
+{
+ if (mod->arch.init_unw_table)
+ unw_remove_unwind_table(mod->arch.init_unw_table);
+ if (mod->arch.core_unw_table)
+ unw_remove_unwind_table(mod->arch.core_unw_table);
+}
+
+#ifdef CONFIG_SMP
+void
+percpu_modcopy (void *pcpudst, const void *src, unsigned long size)
+{
+ unsigned int i;
+ for (i = 0; i < NR_CPUS; i++)
+ if (cpu_possible(i))
+ memcpy(pcpudst + __per_cpu_offset[i], src, size);
+}
+#endif /* CONFIG_SMP */
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
new file mode 100644
index 00000000000..5018c7f2e7a
--- /dev/null
+++ b/arch/ia64/kernel/pal.S
@@ -0,0 +1,302 @@
+/*
+ * PAL Firmware support
+ * IA-64 Processor Programmers Reference Vol 2
+ *
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
+ * David Mosberger <davidm@hpl.hp.com>
+ * Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 05/22/2000 eranian Added support for stacked register calls
+ * 05/24/2000 eranian Added support for physical mode static calls
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+
+ .data
+pal_entry_point:
+ data8 ia64_pal_default_handler
+ .text
+
+/*
+ * Set the PAL entry point address. This could be written in C code, but we do it here
+ * to keep it all in one module (besides, it's so trivial that it's
+ * not a big deal).
+ *
+ * in0 Address of the PAL entry point (text address, NOT a function descriptor).
+ */
+GLOBAL_ENTRY(ia64_pal_handler_init)
+ alloc r3=ar.pfs,1,0,0,0
+ movl r2=pal_entry_point
+ ;;
+ st8 [r2]=in0
+ br.ret.sptk.many rp
+END(ia64_pal_handler_init)
+
+/*
+ * Default PAL call handler. This needs to be coded in assembly because it uses
+ * the static calling convention, i.e., the RSE may not be used and calls are
+ * done via "br.cond" (not "br.call").
+ */
+GLOBAL_ENTRY(ia64_pal_default_handler)
+ mov r8=-1
+ br.cond.sptk.many rp
+END(ia64_pal_default_handler)
+
+/*
+ * Make a PAL call using the static calling convention.
+ *
+ * in0 Index of PAL service
+ * in1 - in3 Remaining PAL arguments
+ * in4 1 ==> clear psr.ic, 0 ==> don't clear psr.ic
+ *
+ */
+GLOBAL_ENTRY(ia64_pal_call_static)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
+ alloc loc1 = ar.pfs,5,5,0,0
+ movl loc2 = pal_entry_point
+1: {
+ mov r28 = in0
+ mov r29 = in1
+ mov r8 = ip
+ }
+ ;;
+ ld8 loc2 = [loc2] // loc2 <- entry point
+ tbit.nz p6,p7 = in4, 0
+ adds r8 = 1f-1b,r8
+ mov loc4=ar.rsc // save RSE configuration
+ ;;
+ mov ar.rsc=0 // put RSE in enforced lazy, LE mode
+ mov loc3 = psr
+ mov loc0 = rp
+ .body
+ mov r30 = in2
+
+(p6) rsm psr.i | psr.ic
+ mov r31 = in3
+ mov b7 = loc2
+
+(p7) rsm psr.i
+ ;;
+(p6) srlz.i
+ mov rp = r8
+ br.cond.sptk.many b7
+1: mov psr.l = loc3
+ mov ar.rsc = loc4 // restore RSE configuration
+ mov ar.pfs = loc1
+ mov rp = loc0
+ ;;
+ srlz.d // seralize restoration of psr.l
+ br.ret.sptk.many b0
+END(ia64_pal_call_static)
+
+/*
+ * Make a PAL call using the stacked registers calling convention.
+ *
+ * Inputs:
+ * in0 Index of PAL service
+ * in2 - in3 Remaning PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_stacked)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+ alloc loc1 = ar.pfs,4,4,4,0
+ movl loc2 = pal_entry_point
+
+ mov r28 = in0 // Index MUST be copied to r28
+ mov out0 = in0 // AND in0 of PAL function
+ mov loc0 = rp
+ .body
+ ;;
+ ld8 loc2 = [loc2] // loc2 <- entry point
+ mov out1 = in1
+ mov out2 = in2
+ mov out3 = in3
+ mov loc3 = psr
+ ;;
+ rsm psr.i
+ mov b7 = loc2
+ ;;
+ br.call.sptk.many rp=b7 // now make the call
+.ret0: mov psr.l = loc3
+ mov ar.pfs = loc1
+ mov rp = loc0
+ ;;
+ srlz.d // serialize restoration of psr.l
+ br.ret.sptk.many b0
+END(ia64_pal_call_stacked)
+
+/*
+ * Make a physical mode PAL call using the static registers calling convention.
+ *
+ * Inputs:
+ * in0 Index of PAL service
+ * in2 - in3 Remaning PAL arguments
+ *
+ * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
+ * So we don't need to clear them.
+ */
+#define PAL_PSR_BITS_TO_CLEAR \
+ (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_DB | IA64_PSR_RT | \
+ IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
+ IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PAL_PSR_BITS_TO_SET \
+ (IA64_PSR_BN)
+
+
+GLOBAL_ENTRY(ia64_pal_call_phys_static)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+ alloc loc1 = ar.pfs,4,7,0,0
+ movl loc2 = pal_entry_point
+1: {
+ mov r28 = in0 // copy procedure index
+ mov r8 = ip // save ip to compute branch
+ mov loc0 = rp // save rp
+ }
+ .body
+ ;;
+ ld8 loc2 = [loc2] // loc2 <- entry point
+ mov r29 = in1 // first argument
+ mov r30 = in2 // copy arg2
+ mov r31 = in3 // copy arg3
+ ;;
+ mov loc3 = psr // save psr
+ adds r8 = 1f-1b,r8 // calculate return address for call
+ ;;
+ mov loc4=ar.rsc // save RSE configuration
+ dep.z loc2=loc2,0,61 // convert pal entry point to physical
+ tpa r8=r8 // convert rp to physical
+ ;;
+ mov b7 = loc2 // install target to branch reg
+ mov ar.rsc=0 // put RSE in enforced lazy, LE mode
+ movl r16=PAL_PSR_BITS_TO_CLEAR
+ movl r17=PAL_PSR_BITS_TO_SET
+ ;;
+ or loc3=loc3,r17 // add in psr the bits to set
+ ;;
+ andcm r16=loc3,r16 // removes bits to clear from psr
+ br.call.sptk.many rp=ia64_switch_mode_phys
+.ret1: mov rp = r8 // install return address (physical)
+ mov loc5 = r19
+ mov loc6 = r20
+ br.cond.sptk.many b7
+1:
+ mov ar.rsc=0 // put RSE in enforced lazy, LE mode
+ mov r16=loc3 // r16= original psr
+ mov r19=loc5
+ mov r20=loc6
+ br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+.ret2:
+ mov psr.l = loc3 // restore init PSR
+
+ mov ar.pfs = loc1
+ mov rp = loc0
+ ;;
+ mov ar.rsc=loc4 // restore RSE configuration
+ srlz.d // seralize restoration of psr.l
+ br.ret.sptk.many b0
+END(ia64_pal_call_phys_static)
+
+/*
+ * Make a PAL call using the stacked registers in physical mode.
+ *
+ * Inputs:
+ * in0 Index of PAL service
+ * in2 - in3 Remaning PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
+ alloc loc1 = ar.pfs,5,7,4,0
+ movl loc2 = pal_entry_point
+1: {
+ mov r28 = in0 // copy procedure index
+ mov loc0 = rp // save rp
+ }
+ .body
+ ;;
+ ld8 loc2 = [loc2] // loc2 <- entry point
+ mov out0 = in0 // first argument
+ mov out1 = in1 // copy arg2
+ mov out2 = in2 // copy arg3
+ mov out3 = in3 // copy arg3
+ ;;
+ mov loc3 = psr // save psr
+ ;;
+ mov loc4=ar.rsc // save RSE configuration
+ dep.z loc2=loc2,0,61 // convert pal entry point to physical
+ ;;
+ mov ar.rsc=0 // put RSE in enforced lazy, LE mode
+ movl r16=PAL_PSR_BITS_TO_CLEAR
+ movl r17=PAL_PSR_BITS_TO_SET
+ ;;
+ or loc3=loc3,r17 // add in psr the bits to set
+ mov b7 = loc2 // install target to branch reg
+ ;;
+ andcm r16=loc3,r16 // removes bits to clear from psr
+ br.call.sptk.many rp=ia64_switch_mode_phys
+.ret6:
+ mov loc5 = r19
+ mov loc6 = r20
+ br.call.sptk.many rp=b7 // now make the call
+.ret7:
+ mov ar.rsc=0 // put RSE in enforced lazy, LE mode
+ mov r16=loc3 // r16= original psr
+ mov r19=loc5
+ mov r20=loc6
+ br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+
+.ret8: mov psr.l = loc3 // restore init PSR
+ mov ar.pfs = loc1
+ mov rp = loc0
+ ;;
+ mov ar.rsc=loc4 // restore RSE configuration
+ srlz.d // seralize restoration of psr.l
+ br.ret.sptk.many b0
+END(ia64_pal_call_phys_stacked)
+
+/*
+ * Save scratch fp scratch regs which aren't saved in pt_regs already (fp10-fp15).
+ *
+ * NOTE: We need to do this since firmware (SAL and PAL) may use any of the scratch
+ * regs fp-low partition.
+ *
+ * Inputs:
+ * in0 Address of stack storage for fp regs
+ */
+GLOBAL_ENTRY(ia64_save_scratch_fpregs)
+ alloc r3=ar.pfs,1,0,0,0
+ add r2=16,in0
+ ;;
+ stf.spill [in0] = f10,32
+ stf.spill [r2] = f11,32
+ ;;
+ stf.spill [in0] = f12,32
+ stf.spill [r2] = f13,32
+ ;;
+ stf.spill [in0] = f14,32
+ stf.spill [r2] = f15,32
+ br.ret.sptk.many rp
+END(ia64_save_scratch_fpregs)
+
+/*
+ * Load scratch fp scratch regs (fp10-fp15)
+ *
+ * Inputs:
+ * in0 Address of stack storage for fp regs
+ */
+GLOBAL_ENTRY(ia64_load_scratch_fpregs)
+ alloc r3=ar.pfs,1,0,0,0
+ add r2=16,in0
+ ;;
+ ldf.fill f10 = [in0],32
+ ldf.fill f11 = [r2],32
+ ;;
+ ldf.fill f12 = [in0],32
+ ldf.fill f13 = [r2],32
+ ;;
+ ldf.fill f14 = [in0],32
+ ldf.fill f15 = [r2],32
+ br.ret.sptk.many rp
+END(ia64_load_scratch_fpregs)
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
new file mode 100644
index 00000000000..25e7c834456
--- /dev/null
+++ b/arch/ia64/kernel/palinfo.c
@@ -0,0 +1,1023 @@
+/*
+ * palinfo.c
+ *
+ * Prints processor specific information reported by PAL.
+ * This code is based on specification of PAL as of the
+ * Intel IA-64 Architecture Software Developer's Manual v1.0.
+ *
+ *
+ * Copyright (C) 2000-2001, 2003 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2004 Intel Corporation
+ * Ashok Raj <ashok.raj@intel.com>
+ *
+ * 05/26/2000 S.Eranian initial release
+ * 08/21/2000 S.Eranian updated to July 2000 PAL specs
+ * 02/05/2001 S.Eranian fixed module support
+ * 10/23/2001 S.Eranian updated pal_perf_mon_info bug fixes
+ * 03/24/2004 Ashok Raj updated to work with CPU Hotplug
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <linux/smp.h>
+
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
+MODULE_DESCRIPTION("/proc interface to IA-64 PAL");
+MODULE_LICENSE("GPL");
+
+#define PALINFO_VERSION "0.5"
+
+typedef int (*palinfo_func_t)(char*);
+
+typedef struct {
+ const char *name; /* name of the proc entry */
+ palinfo_func_t proc_read; /* function to call for reading */
+ struct proc_dir_entry *entry; /* registered entry (removal) */
+} palinfo_entry_t;
+
+
+/*
+ * A bunch of string array to get pretty printing
+ */
+
+static char *cache_types[] = {
+ "", /* not used */
+ "Instruction",
+ "Data",
+ "Data/Instruction" /* unified */
+};
+
+static const char *cache_mattrib[]={
+ "WriteThrough",
+ "WriteBack",
+ "", /* reserved */
+ "" /* reserved */
+};
+
+static const char *cache_st_hints[]={
+ "Temporal, level 1",
+ "Reserved",
+ "Reserved",
+ "Non-temporal, all levels",
+ "Reserved",
+ "Reserved",
+ "Reserved",
+ "Reserved"
+};
+
+static const char *cache_ld_hints[]={
+ "Temporal, level 1",
+ "Non-temporal, level 1",
+ "Reserved",
+ "Non-temporal, all levels",
+ "Reserved",
+ "Reserved",
+ "Reserved",
+ "Reserved"
+};
+
+static const char *rse_hints[]={
+ "enforced lazy",
+ "eager stores",
+ "eager loads",
+ "eager loads and stores"
+};
+
+#define RSE_HINTS_COUNT ARRAY_SIZE(rse_hints)
+
+static const char *mem_attrib[]={
+ "WB", /* 000 */
+ "SW", /* 001 */
+ "010", /* 010 */
+ "011", /* 011 */
+ "UC", /* 100 */
+ "UCE", /* 101 */
+ "WC", /* 110 */
+ "NaTPage" /* 111 */
+};
+
+/*
+ * Take a 64bit vector and produces a string such that
+ * if bit n is set then 2^n in clear text is generated. The adjustment
+ * to the right unit is also done.
+ *
+ * Input:
+ * - a pointer to a buffer to hold the string
+ * - a 64-bit vector
+ * Ouput:
+ * - a pointer to the end of the buffer
+ *
+ */
+static char *
+bitvector_process(char *p, u64 vector)
+{
+ int i,j;
+ const char *units[]={ "", "K", "M", "G", "T" };
+
+ for (i=0, j=0; i < 64; i++ , j=i/10) {
+ if (vector & 0x1) {
+ p += sprintf(p, "%d%s ", 1 << (i-j*10), units[j]);
+ }
+ vector >>= 1;
+ }
+ return p;
+}
+
+/*
+ * Take a 64bit vector and produces a string such that
+ * if bit n is set then register n is present. The function
+ * takes into account consecutive registers and prints out ranges.
+ *
+ * Input:
+ * - a pointer to a buffer to hold the string
+ * - a 64-bit vector
+ * Ouput:
+ * - a pointer to the end of the buffer
+ *
+ */
+static char *
+bitregister_process(char *p, u64 *reg_info, int max)
+{
+ int i, begin, skip = 0;
+ u64 value = reg_info[0];
+
+ value >>= i = begin = ffs(value) - 1;
+
+ for(; i < max; i++ ) {
+
+ if (i != 0 && (i%64) == 0) value = *++reg_info;
+
+ if ((value & 0x1) == 0 && skip == 0) {
+ if (begin <= i - 2)
+ p += sprintf(p, "%d-%d ", begin, i-1);
+ else
+ p += sprintf(p, "%d ", i-1);
+ skip = 1;
+ begin = -1;
+ } else if ((value & 0x1) && skip == 1) {
+ skip = 0;
+ begin = i;
+ }
+ value >>=1;
+ }
+ if (begin > -1) {
+ if (begin < 127)
+ p += sprintf(p, "%d-127", begin);
+ else
+ p += sprintf(p, "127");
+ }
+
+ return p;
+}
+
+static int
+power_info(char *page)
+{
+ s64 status;
+ char *p = page;
+ u64 halt_info_buffer[8];
+ pal_power_mgmt_info_u_t *halt_info =(pal_power_mgmt_info_u_t *)halt_info_buffer;
+ int i;
+
+ status = ia64_pal_halt_info(halt_info);
+ if (status != 0) return 0;
+
+ for (i=0; i < 8 ; i++ ) {
+ if (halt_info[i].pal_power_mgmt_info_s.im == 1) {
+ p += sprintf(p, "Power level %d:\n"
+ "\tentry_latency : %d cycles\n"
+ "\texit_latency : %d cycles\n"
+ "\tpower consumption : %d mW\n"
+ "\tCache+TLB coherency : %s\n", i,
+ halt_info[i].pal_power_mgmt_info_s.entry_latency,
+ halt_info[i].pal_power_mgmt_info_s.exit_latency,
+ halt_info[i].pal_power_mgmt_info_s.power_consumption,
+ halt_info[i].pal_power_mgmt_info_s.co ? "Yes" : "No");
+ } else {
+ p += sprintf(p,"Power level %d: not implemented\n",i);
+ }
+ }
+ return p - page;
+}
+
+static int
+cache_info(char *page)
+{
+ char *p = page;
+ u64 i, levels, unique_caches;
+ pal_cache_config_info_t cci;
+ int j, k;
+ s64 status;
+
+ if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) {
+ printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status);
+ return 0;
+ }
+
+ p += sprintf(p, "Cache levels : %ld\nUnique caches : %ld\n\n", levels, unique_caches);
+
+ for (i=0; i < levels; i++) {
+
+ for (j=2; j >0 ; j--) {
+
+ /* even without unification some level may not be present */
+ if ((status=ia64_pal_cache_config_info(i,j, &cci)) != 0) {
+ continue;
+ }
+ p += sprintf(p,
+ "%s Cache level %lu:\n"
+ "\tSize : %lu bytes\n"
+ "\tAttributes : ",
+ cache_types[j+cci.pcci_unified], i+1,
+ cci.pcci_cache_size);
+
+ if (cci.pcci_unified) p += sprintf(p, "Unified ");
+
+ p += sprintf(p, "%s\n", cache_mattrib[cci.pcci_cache_attr]);
+
+ p += sprintf(p,
+ "\tAssociativity : %d\n"
+ "\tLine size : %d bytes\n"
+ "\tStride : %d bytes\n",
+ cci.pcci_assoc, 1<<cci.pcci_line_size, 1<<cci.pcci_stride);
+ if (j == 1)
+ p += sprintf(p, "\tStore latency : N/A\n");
+ else
+ p += sprintf(p, "\tStore latency : %d cycle(s)\n",
+ cci.pcci_st_latency);
+
+ p += sprintf(p,
+ "\tLoad latency : %d cycle(s)\n"
+ "\tStore hints : ", cci.pcci_ld_latency);
+
+ for(k=0; k < 8; k++ ) {
+ if ( cci.pcci_st_hints & 0x1)
+ p += sprintf(p, "[%s]", cache_st_hints[k]);
+ cci.pcci_st_hints >>=1;
+ }
+ p += sprintf(p, "\n\tLoad hints : ");
+
+ for(k=0; k < 8; k++ ) {
+ if (cci.pcci_ld_hints & 0x1)
+ p += sprintf(p, "[%s]", cache_ld_hints[k]);
+ cci.pcci_ld_hints >>=1;
+ }
+ p += sprintf(p,
+ "\n\tAlias boundary : %d byte(s)\n"
+ "\tTag LSB : %d\n"
+ "\tTag MSB : %d\n",
+ 1<<cci.pcci_alias_boundary, cci.pcci_tag_lsb,
+ cci.pcci_tag_msb);
+
+ /* when unified, data(j=2) is enough */
+ if (cci.pcci_unified) break;
+ }
+ }
+ return p - page;
+}
+
+
+static int
+vm_info(char *page)
+{
+ char *p = page;
+ u64 tr_pages =0, vw_pages=0, tc_pages;
+ u64 attrib;
+ pal_vm_info_1_u_t vm_info_1;
+ pal_vm_info_2_u_t vm_info_2;
+ pal_tc_info_u_t tc_info;
+ ia64_ptce_info_t ptce;
+ const char *sep;
+ int i, j;
+ s64 status;
+
+ if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
+ printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
+ return 0;
+ }
+
+
+ p += sprintf(p,
+ "Physical Address Space : %d bits\n"
+ "Virtual Address Space : %d bits\n"
+ "Protection Key Registers(PKR) : %d\n"
+ "Implemented bits in PKR.key : %d\n"
+ "Hash Tag ID : 0x%x\n"
+ "Size of RR.rid : %d\n",
+ vm_info_1.pal_vm_info_1_s.phys_add_size,
+ vm_info_2.pal_vm_info_2_s.impl_va_msb+1, vm_info_1.pal_vm_info_1_s.max_pkr+1,
+ vm_info_1.pal_vm_info_1_s.key_size, vm_info_1.pal_vm_info_1_s.hash_tag_id,
+ vm_info_2.pal_vm_info_2_s.rid_size);
+
+ if (ia64_pal_mem_attrib(&attrib) != 0)
+ return 0;
+
+ p += sprintf(p, "Supported memory attributes : ");
+ sep = "";
+ for (i = 0; i < 8; i++) {
+ if (attrib & (1 << i)) {
+ p += sprintf(p, "%s%s", sep, mem_attrib[i]);
+ sep = ", ";
+ }
+ }
+ p += sprintf(p, "\n");
+
+ if ((status = ia64_pal_vm_page_size(&tr_pages, &vw_pages)) !=0) {
+ printk(KERN_ERR "ia64_pal_vm_page_size=%ld\n", status);
+ return 0;
+ }
+
+ p += sprintf(p,
+ "\nTLB walker : %simplemented\n"
+ "Number of DTR : %d\n"
+ "Number of ITR : %d\n"
+ "TLB insertable page sizes : ",
+ vm_info_1.pal_vm_info_1_s.vw ? "" : "not ",
+ vm_info_1.pal_vm_info_1_s.max_dtr_entry+1,
+ vm_info_1.pal_vm_info_1_s.max_itr_entry+1);
+
+
+ p = bitvector_process(p, tr_pages);
+
+ p += sprintf(p, "\nTLB purgeable page sizes : ");
+
+ p = bitvector_process(p, vw_pages);
+
+ if ((status=ia64_get_ptce(&ptce)) != 0) {
+ printk(KERN_ERR "ia64_get_ptce=%ld\n", status);
+ return 0;
+ }
+
+ p += sprintf(p,
+ "\nPurge base address : 0x%016lx\n"
+ "Purge outer loop count : %d\n"
+ "Purge inner loop count : %d\n"
+ "Purge outer loop stride : %d\n"
+ "Purge inner loop stride : %d\n",
+ ptce.base, ptce.count[0], ptce.count[1], ptce.stride[0], ptce.stride[1]);
+
+ p += sprintf(p,
+ "TC Levels : %d\n"
+ "Unique TC(s) : %d\n",
+ vm_info_1.pal_vm_info_1_s.num_tc_levels,
+ vm_info_1.pal_vm_info_1_s.max_unique_tcs);
+
+ for(i=0; i < vm_info_1.pal_vm_info_1_s.num_tc_levels; i++) {
+ for (j=2; j>0 ; j--) {
+ tc_pages = 0; /* just in case */
+
+
+ /* even without unification, some levels may not be present */
+ if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) {
+ continue;
+ }
+
+ p += sprintf(p,
+ "\n%s Translation Cache Level %d:\n"
+ "\tHash sets : %d\n"
+ "\tAssociativity : %d\n"
+ "\tNumber of entries : %d\n"
+ "\tFlags : ",
+ cache_types[j+tc_info.tc_unified], i+1, tc_info.tc_num_sets,
+ tc_info.tc_associativity, tc_info.tc_num_entries);
+
+ if (tc_info.tc_pf) p += sprintf(p, "PreferredPageSizeOptimized ");
+ if (tc_info.tc_unified) p += sprintf(p, "Unified ");
+ if (tc_info.tc_reduce_tr) p += sprintf(p, "TCReduction");
+
+ p += sprintf(p, "\n\tSupported page sizes: ");
+
+ p = bitvector_process(p, tc_pages);
+
+ /* when unified date (j=2) is enough */
+ if (tc_info.tc_unified) break;
+ }
+ }
+ p += sprintf(p, "\n");
+
+ return p - page;
+}
+
+
+static int
+register_info(char *page)
+{
+ char *p = page;
+ u64 reg_info[2];
+ u64 info;
+ u64 phys_stacked;
+ pal_hints_u_t hints;
+ u64 iregs, dregs;
+ char *info_type[]={
+ "Implemented AR(s)",
+ "AR(s) with read side-effects",
+ "Implemented CR(s)",
+ "CR(s) with read side-effects",
+ };
+
+ for(info=0; info < 4; info++) {
+
+ if (ia64_pal_register_info(info, &reg_info[0], &reg_info[1]) != 0) return 0;
+
+ p += sprintf(p, "%-32s : ", info_type[info]);
+
+ p = bitregister_process(p, reg_info, 128);
+
+ p += sprintf(p, "\n");
+ }
+
+ if (ia64_pal_rse_info(&phys_stacked, &hints) != 0) return 0;
+
+ p += sprintf(p,
+ "RSE stacked physical registers : %ld\n"
+ "RSE load/store hints : %ld (%s)\n",
+ phys_stacked, hints.ph_data,
+ hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)");
+
+ if (ia64_pal_debug_info(&iregs, &dregs))
+ return 0;
+
+ p += sprintf(p,
+ "Instruction debug register pairs : %ld\n"
+ "Data debug register pairs : %ld\n", iregs, dregs);
+
+ return p - page;
+}
+
+static const char *proc_features[]={
+ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
+ NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,
+ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
+ NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL,
+ NULL,NULL,NULL,NULL,NULL,
+ "XIP,XPSR,XFS implemented",
+ "XR1-XR3 implemented",
+ "Disable dynamic predicate prediction",
+ "Disable processor physical number",
+ "Disable dynamic data cache prefetch",
+ "Disable dynamic inst cache prefetch",
+ "Disable dynamic branch prediction",
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ "Disable BINIT on processor time-out",
+ "Disable dynamic power management (DPM)",
+ "Disable coherency",
+ "Disable cache",
+ "Enable CMCI promotion",
+ "Enable MCA to BINIT promotion",
+ "Enable MCA promotion",
+ "Enable BERR promotion"
+};
+
+
+static int
+processor_info(char *page)
+{
+ char *p = page;
+ const char **v = proc_features;
+ u64 avail=1, status=1, control=1;
+ int i;
+ s64 ret;
+
+ if ((ret=ia64_pal_proc_get_features(&avail, &status, &control)) != 0) return 0;
+
+ for(i=0; i < 64; i++, v++,avail >>=1, status >>=1, control >>=1) {
+ if ( ! *v ) continue;
+ p += sprintf(p, "%-40s : %s%s %s\n", *v,
+ avail & 0x1 ? "" : "NotImpl",
+ avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "",
+ avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): "");
+ }
+ return p - page;
+}
+
+static const char *bus_features[]={
+ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
+ NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,
+ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
+ NULL,NULL,
+ "Request Bus Parking",
+ "Bus Lock Mask",
+ "Enable Half Transfer",
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ "Enable Cache Line Repl. Shared",
+ "Enable Cache Line Repl. Exclusive",
+ "Disable Transaction Queuing",
+ "Disable Response Error Checking",
+ "Disable Bus Error Checking",
+ "Disable Bus Requester Internal Error Signalling",
+ "Disable Bus Requester Error Signalling",
+ "Disable Bus Initialization Event Checking",
+ "Disable Bus Initialization Event Signalling",
+ "Disable Bus Address Error Checking",
+ "Disable Bus Address Error Signalling",
+ "Disable Bus Data Error Checking"
+};
+
+
+static int
+bus_info(char *page)
+{
+ char *p = page;
+ const char **v = bus_features;
+ pal_bus_features_u_t av, st, ct;
+ u64 avail, status, control;
+ int i;
+ s64 ret;
+
+ if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) return 0;
+
+ avail = av.pal_bus_features_val;
+ status = st.pal_bus_features_val;
+ control = ct.pal_bus_features_val;
+
+ for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) {
+ if ( ! *v ) continue;
+ p += sprintf(p, "%-48s : %s%s %s\n", *v,
+ avail & 0x1 ? "" : "NotImpl",
+ avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "",
+ avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): "");
+ }
+ return p - page;
+}
+
+static int
+version_info(char *page)
+{
+ pal_version_u_t min_ver, cur_ver;
+ char *p = page;
+
+ /* The PAL_VERSION call is advertised as being able to support
+ * both physical and virtual mode calls. This seems to be a documentation
+ * bug rather than firmware bug. In fact, it does only support physical mode.
+ * So now the code reflects this fact and the pal_version() has been updated
+ * accordingly.
+ */
+ if (ia64_pal_version(&min_ver, &cur_ver) != 0) return 0;
+
+ p += sprintf(p,
+ "PAL_vendor : 0x%02x (min=0x%02x)\n"
+ "PAL_A : %x.%x.%x (min=%x.%x.%x)\n"
+ "PAL_B : %x.%x.%x (min=%x.%x.%x)\n",
+ cur_ver.pal_version_s.pv_pal_vendor, min_ver.pal_version_s.pv_pal_vendor,
+
+ cur_ver.pal_version_s.pv_pal_a_model>>4,
+ cur_ver.pal_version_s.pv_pal_a_model&0xf, cur_ver.pal_version_s.pv_pal_a_rev,
+ min_ver.pal_version_s.pv_pal_a_model>>4,
+ min_ver.pal_version_s.pv_pal_a_model&0xf, min_ver.pal_version_s.pv_pal_a_rev,
+
+ cur_ver.pal_version_s.pv_pal_b_model>>4,
+ cur_ver.pal_version_s.pv_pal_b_model&0xf, cur_ver.pal_version_s.pv_pal_b_rev,
+ min_ver.pal_version_s.pv_pal_b_model>>4,
+ min_ver.pal_version_s.pv_pal_b_model&0xf, min_ver.pal_version_s.pv_pal_b_rev);
+ return p - page;
+}
+
+static int
+perfmon_info(char *page)
+{
+ char *p = page;
+ u64 pm_buffer[16];
+ pal_perf_mon_info_u_t pm_info;
+
+ if (ia64_pal_perf_mon_info(pm_buffer, &pm_info) != 0) return 0;
+
+ p += sprintf(p,
+ "PMC/PMD pairs : %d\n"
+ "Counter width : %d bits\n"
+ "Cycle event number : %d\n"
+ "Retired event number : %d\n"
+ "Implemented PMC : ",
+ pm_info.pal_perf_mon_info_s.generic, pm_info.pal_perf_mon_info_s.width,
+ pm_info.pal_perf_mon_info_s.cycles, pm_info.pal_perf_mon_info_s.retired);
+
+ p = bitregister_process(p, pm_buffer, 256);
+ p += sprintf(p, "\nImplemented PMD : ");
+ p = bitregister_process(p, pm_buffer+4, 256);
+ p += sprintf(p, "\nCycles count capable : ");
+ p = bitregister_process(p, pm_buffer+8, 256);
+ p += sprintf(p, "\nRetired bundles count capable : ");
+
+#ifdef CONFIG_ITANIUM
+ /*
+ * PAL_PERF_MON_INFO reports that only PMC4 can be used to count CPU_CYCLES
+ * which is wrong, both PMC4 and PMD5 support it.
+ */
+ if (pm_buffer[12] == 0x10) pm_buffer[12]=0x30;
+#endif
+
+ p = bitregister_process(p, pm_buffer+12, 256);
+
+ p += sprintf(p, "\n");
+
+ return p - page;
+}
+
+static int
+frequency_info(char *page)
+{
+ char *p = page;
+ struct pal_freq_ratio proc, itc, bus;
+ u64 base;
+
+ if (ia64_pal_freq_base(&base) == -1)
+ p += sprintf(p, "Output clock : not implemented\n");
+ else
+ p += sprintf(p, "Output clock : %ld ticks/s\n", base);
+
+ if (ia64_pal_freq_ratios(&proc, &bus, &itc) != 0) return 0;
+
+ p += sprintf(p,
+ "Processor/Clock ratio : %ld/%ld\n"
+ "Bus/Clock ratio : %ld/%ld\n"
+ "ITC/Clock ratio : %ld/%ld\n",
+ proc.num, proc.den, bus.num, bus.den, itc.num, itc.den);
+
+ return p - page;
+}
+
+static int
+tr_info(char *page)
+{
+ char *p = page;
+ s64 status;
+ pal_tr_valid_u_t tr_valid;
+ u64 tr_buffer[4];
+ pal_vm_info_1_u_t vm_info_1;
+ pal_vm_info_2_u_t vm_info_2;
+ u64 i, j;
+ u64 max[3], pgm;
+ struct ifa_reg {
+ u64 valid:1;
+ u64 ig:11;
+ u64 vpn:52;
+ } *ifa_reg;
+ struct itir_reg {
+ u64 rv1:2;
+ u64 ps:6;
+ u64 key:24;
+ u64 rv2:32;
+ } *itir_reg;
+ struct gr_reg {
+ u64 p:1;
+ u64 rv1:1;
+ u64 ma:3;
+ u64 a:1;
+ u64 d:1;
+ u64 pl:2;
+ u64 ar:3;
+ u64 ppn:38;
+ u64 rv2:2;
+ u64 ed:1;
+ u64 ig:11;
+ } *gr_reg;
+ struct rid_reg {
+ u64 ig1:1;
+ u64 rv1:1;
+ u64 ig2:6;
+ u64 rid:24;
+ u64 rv2:32;
+ } *rid_reg;
+
+ if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
+ printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
+ return 0;
+ }
+ max[0] = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
+ max[1] = vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
+
+ for (i=0; i < 2; i++ ) {
+ for (j=0; j < max[i]; j++) {
+
+ status = ia64_pal_tr_read(j, i, tr_buffer, &tr_valid);
+ if (status != 0) {
+ printk(KERN_ERR "palinfo: pal call failed on tr[%lu:%lu]=%ld\n",
+ i, j, status);
+ continue;
+ }
+
+ ifa_reg = (struct ifa_reg *)&tr_buffer[2];
+
+ if (ifa_reg->valid == 0) continue;
+
+ gr_reg = (struct gr_reg *)tr_buffer;
+ itir_reg = (struct itir_reg *)&tr_buffer[1];
+ rid_reg = (struct rid_reg *)&tr_buffer[3];
+
+ pgm = -1 << (itir_reg->ps - 12);
+ p += sprintf(p,
+ "%cTR%lu: av=%d pv=%d dv=%d mv=%d\n"
+ "\tppn : 0x%lx\n"
+ "\tvpn : 0x%lx\n"
+ "\tps : ",
+ "ID"[i], j,
+ tr_valid.pal_tr_valid_s.access_rights_valid,
+ tr_valid.pal_tr_valid_s.priv_level_valid,
+ tr_valid.pal_tr_valid_s.dirty_bit_valid,
+ tr_valid.pal_tr_valid_s.mem_attr_valid,
+ (gr_reg->ppn & pgm)<< 12, (ifa_reg->vpn & pgm)<< 12);
+
+ p = bitvector_process(p, 1<< itir_reg->ps);
+
+ p += sprintf(p,
+ "\n\tpl : %d\n"
+ "\tar : %d\n"
+ "\trid : %x\n"
+ "\tp : %d\n"
+ "\tma : %d\n"
+ "\td : %d\n",
+ gr_reg->pl, gr_reg->ar, rid_reg->rid, gr_reg->p, gr_reg->ma,
+ gr_reg->d);
+ }
+ }
+ return p - page;
+}
+
+
+
+/*
+ * List {name,function} pairs for every entry in /proc/palinfo/cpu*
+ */
+static palinfo_entry_t palinfo_entries[]={
+ { "version_info", version_info, },
+ { "vm_info", vm_info, },
+ { "cache_info", cache_info, },
+ { "power_info", power_info, },
+ { "register_info", register_info, },
+ { "processor_info", processor_info, },
+ { "perfmon_info", perfmon_info, },
+ { "frequency_info", frequency_info, },
+ { "bus_info", bus_info },
+ { "tr_info", tr_info, }
+};
+
+#define NR_PALINFO_ENTRIES (int) ARRAY_SIZE(palinfo_entries)
+
+/*
+ * this array is used to keep track of the proc entries we create. This is
+ * required in the module mode when we need to remove all entries. The procfs code
+ * does not do recursion of deletion
+ *
+ * Notes:
+ * - +1 accounts for the cpuN directory entry in /proc/pal
+ */
+#define NR_PALINFO_PROC_ENTRIES (NR_CPUS*(NR_PALINFO_ENTRIES+1))
+
+static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES];
+static struct proc_dir_entry *palinfo_dir;
+
+/*
+ * This data structure is used to pass which cpu,function is being requested
+ * It must fit in a 64bit quantity to be passed to the proc callback routine
+ *
+ * In SMP mode, when we get a request for another CPU, we must call that
+ * other CPU using IPI and wait for the result before returning.
+ */
+typedef union {
+ u64 value;
+ struct {
+ unsigned req_cpu: 32; /* for which CPU this info is */
+ unsigned func_id: 32; /* which function is requested */
+ } pal_func_cpu;
+} pal_func_cpu_u_t;
+
+#define req_cpu pal_func_cpu.req_cpu
+#define func_id pal_func_cpu.func_id
+
+#ifdef CONFIG_SMP
+
+/*
+ * used to hold information about final function to call
+ */
+typedef struct {
+ palinfo_func_t func; /* pointer to function to call */
+ char *page; /* buffer to store results */
+ int ret; /* return value from call */
+} palinfo_smp_data_t;
+
+
+/*
+ * this function does the actual final call and he called
+ * from the smp code, i.e., this is the palinfo callback routine
+ */
+static void
+palinfo_smp_call(void *info)
+{
+ palinfo_smp_data_t *data = (palinfo_smp_data_t *)info;
+ if (data == NULL) {
+ printk(KERN_ERR "palinfo: data pointer is NULL\n");
+ data->ret = 0; /* no output */
+ return;
+ }
+ /* does this actual call */
+ data->ret = (*data->func)(data->page);
+}
+
+/*
+ * function called to trigger the IPI, we need to access a remote CPU
+ * Return:
+ * 0 : error or nothing to output
+ * otherwise how many bytes in the "page" buffer were written
+ */
+static
+int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
+{
+ palinfo_smp_data_t ptr;
+ int ret;
+
+ ptr.func = palinfo_entries[f->func_id].proc_read;
+ ptr.page = page;
+ ptr.ret = 0; /* just in case */
+
+
+ /* will send IPI to other CPU and wait for completion of remote call */
+ if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 0, 1))) {
+ printk(KERN_ERR "palinfo: remote CPU call from %d to %d on function %d: "
+ "error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret);
+ return 0;
+ }
+ return ptr.ret;
+}
+#else /* ! CONFIG_SMP */
+static
+int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
+{
+ printk(KERN_ERR "palinfo: should not be called with non SMP kernel\n");
+ return 0;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * Entry point routine: all calls go through this function
+ */
+static int
+palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+ int len=0;
+ pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&data;
+
+ /*
+ * in SMP mode, we may need to call another CPU to get correct
+ * information. PAL, by definition, is processor specific
+ */
+ if (f->req_cpu == get_cpu())
+ len = (*palinfo_entries[f->func_id].proc_read)(page);
+ else
+ len = palinfo_handle_smp(f, page);
+
+ put_cpu();
+
+ if (len <= off+count) *eof = 1;
+
+ *start = page + off;
+ len -= off;
+
+ if (len>count) len = count;
+ if (len<0) len = 0;
+
+ return len;
+}
+
+static void
+create_palinfo_proc_entries(unsigned int cpu)
+{
+# define CPUSTR "cpu%d"
+
+ pal_func_cpu_u_t f;
+ struct proc_dir_entry **pdir;
+ struct proc_dir_entry *cpu_dir;
+ int j;
+ char cpustr[sizeof(CPUSTR)];
+
+
+ /*
+ * we keep track of created entries in a depth-first order for
+ * cleanup purposes. Each entry is stored into palinfo_proc_entries
+ */
+ sprintf(cpustr,CPUSTR, cpu);
+
+ cpu_dir = proc_mkdir(cpustr, palinfo_dir);
+
+ f.req_cpu = cpu;
+
+ /*
+ * Compute the location to store per cpu entries
+ * We dont store the top level entry in this list, but
+ * remove it finally after removing all cpu entries.
+ */
+ pdir = &palinfo_proc_entries[cpu*(NR_PALINFO_ENTRIES+1)];
+ *pdir++ = cpu_dir;
+ for (j=0; j < NR_PALINFO_ENTRIES; j++) {
+ f.func_id = j;
+ *pdir = create_proc_read_entry(
+ palinfo_entries[j].name, 0, cpu_dir,
+ palinfo_read_entry, (void *)f.value);
+ if (*pdir)
+ (*pdir)->owner = THIS_MODULE;
+ pdir++;
+ }
+}
+
+static void
+remove_palinfo_proc_entries(unsigned int hcpu)
+{
+ int j;
+ struct proc_dir_entry *cpu_dir, **pdir;
+
+ pdir = &palinfo_proc_entries[hcpu*(NR_PALINFO_ENTRIES+1)];
+ cpu_dir = *pdir;
+ *pdir++=NULL;
+ for (j=0; j < (NR_PALINFO_ENTRIES); j++) {
+ if ((*pdir)) {
+ remove_proc_entry ((*pdir)->name, cpu_dir);
+ *pdir ++= NULL;
+ }
+ }
+
+ if (cpu_dir) {
+ remove_proc_entry(cpu_dir->name, palinfo_dir);
+ }
+}
+
+static int __devinit palinfo_cpu_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
+{
+ unsigned int hotcpu = (unsigned long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ create_palinfo_proc_entries(hotcpu);
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DEAD:
+ remove_palinfo_proc_entries(hotcpu);
+ break;
+#endif
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block palinfo_cpu_notifier =
+{
+ .notifier_call = palinfo_cpu_callback,
+ .priority = 0,
+};
+
+static int __init
+palinfo_init(void)
+{
+ int i = 0;
+
+ printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION);
+ palinfo_dir = proc_mkdir("pal", NULL);
+
+ /* Create palinfo dirs in /proc for all online cpus */
+ for_each_online_cpu(i) {
+ create_palinfo_proc_entries(i);
+ }
+
+ /* Register for future delivery via notify registration */
+ register_cpu_notifier(&palinfo_cpu_notifier);
+
+ return 0;
+}
+
+static void __exit
+palinfo_exit(void)
+{
+ int i = 0;
+
+ /* remove all nodes: depth first pass. Could optimize this */
+ for_each_online_cpu(i) {
+ remove_palinfo_proc_entries(i);
+ }
+
+ /*
+ * Remove the top level entry finally
+ */
+ remove_proc_entry(palinfo_dir->name, NULL);
+
+ /*
+ * Unregister from cpu notifier callbacks
+ */
+ unregister_cpu_notifier(&palinfo_cpu_notifier);
+}
+
+module_init(palinfo_init);
+module_exit(palinfo_exit);
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
new file mode 100644
index 00000000000..367804a605f
--- /dev/null
+++ b/arch/ia64/kernel/patch.c
@@ -0,0 +1,189 @@
+/*
+ * Instruction-patching support.
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/init.h>
+#include <linux/string.h>
+
+#include <asm/patch.h>
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+/*
+ * This was adapted from code written by Tony Luck:
+ *
+ * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle
+ * like this:
+ *
+ * 6 6 5 4 3 2 1
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG
+ *
+ * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB
+ */
+static u64
+get_imm64 (u64 insn_addr)
+{
+ u64 *p = (u64 *) (insn_addr & -16); /* mask out slot number */
+
+ return ( (p[1] & 0x0800000000000000UL) << 4) | /*A*/
+ ((p[1] & 0x00000000007fffffUL) << 40) | /*B*/
+ ((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/
+ ((p[1] & 0x0000100000000000UL) >> 23) | /*D*/
+ ((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/
+ ((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/
+ ((p[1] & 0x000007f000000000UL) >> 36); /*G*/
+}
+
+/* Patch instruction with "val" where "mask" has 1 bits. */
+void
+ia64_patch (u64 insn_addr, u64 mask, u64 val)
+{
+ u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16);
+# define insn_mask ((1UL << 41) - 1)
+ unsigned long shift;
+
+ b0 = b[0]; b1 = b[1];
+ shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */
+ if (shift >= 64) {
+ m1 = mask << (shift - 64);
+ v1 = val << (shift - 64);
+ } else {
+ m0 = mask << shift; m1 = mask >> (64 - shift);
+ v0 = val << shift; v1 = val >> (64 - shift);
+ b[0] = (b0 & ~m0) | (v0 & m0);
+ }
+ b[1] = (b1 & ~m1) | (v1 & m1);
+}
+
+void
+ia64_patch_imm64 (u64 insn_addr, u64 val)
+{
+ ia64_patch(insn_addr,
+ 0x01fffefe000UL, ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */
+ | ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */
+ | ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */
+ | ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */
+ | ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */));
+ ia64_patch(insn_addr - 1, 0x1ffffffffffUL, val >> 22);
+}
+
+void
+ia64_patch_imm60 (u64 insn_addr, u64 val)
+{
+ ia64_patch(insn_addr,
+ 0x011ffffe000UL, ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */
+ | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */));
+ ia64_patch(insn_addr - 1, 0x1fffffffffcUL, val >> 18);
+}
+
+/*
+ * We need sometimes to load the physical address of a kernel
+ * object. Often we can convert the virtual address to physical
+ * at execution time, but sometimes (either for performance reasons
+ * or during error recovery) we cannot to this. Patch the marked
+ * bundles to load the physical address.
+ */
+void __init
+ia64_patch_vtop (unsigned long start, unsigned long end)
+{
+ s32 *offp = (s32 *) start;
+ u64 ip;
+
+ while (offp < (s32 *) end) {
+ ip = (u64) offp + *offp;
+
+ /* replace virtual address with corresponding physical address: */
+ ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip)));
+ ia64_fc((void *) ip);
+ ++offp;
+ }
+ ia64_sync_i();
+ ia64_srlz_i();
+}
+
+void
+ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
+{
+ static int first_time = 1;
+ int need_workaround;
+ s32 *offp = (s32 *) start;
+ u64 *wp;
+
+ need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0);
+
+ if (first_time) {
+ first_time = 0;
+ if (need_workaround)
+ printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n");
+ else
+ printk(KERN_INFO "McKinley Errata 9 workaround not needed; "
+ "disabling it\n");
+ }
+ if (need_workaround)
+ return;
+
+ while (offp < (s32 *) end) {
+ wp = (u64 *) ia64_imva((char *) offp + *offp);
+ wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
+ wp[1] = 0x0004000000000200UL;
+ wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
+ wp[3] = 0x0084006880000200UL;
+ ia64_fc(wp); ia64_fc(wp + 2);
+ ++offp;
+ }
+ ia64_sync_i();
+ ia64_srlz_i();
+}
+
+static void
+patch_fsyscall_table (unsigned long start, unsigned long end)
+{
+ extern unsigned long fsyscall_table[NR_syscalls];
+ s32 *offp = (s32 *) start;
+ u64 ip;
+
+ while (offp < (s32 *) end) {
+ ip = (u64) ia64_imva((char *) offp + *offp);
+ ia64_patch_imm64(ip, (u64) fsyscall_table);
+ ia64_fc((void *) ip);
+ ++offp;
+ }
+ ia64_sync_i();
+ ia64_srlz_i();
+}
+
+static void
+patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
+{
+ extern char fsys_bubble_down[];
+ s32 *offp = (s32 *) start;
+ u64 ip;
+
+ while (offp < (s32 *) end) {
+ ip = (u64) offp + *offp;
+ ia64_patch_imm60((u64) ia64_imva((void *) ip),
+ (u64) (fsys_bubble_down - (ip & -16)) / 16);
+ ia64_fc((void *) ip);
+ ++offp;
+ }
+ ia64_sync_i();
+ ia64_srlz_i();
+}
+
+void
+ia64_patch_gate (void)
+{
+# define START(name) ((unsigned long) __start_gate_##name##_patchlist)
+# define END(name) ((unsigned long)__end_gate_##name##_patchlist)
+
+ patch_fsyscall_table(START(fsyscall), END(fsyscall));
+ patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down));
+ ia64_patch_vtop(START(vtop), END(vtop));
+ ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
+}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
new file mode 100644
index 00000000000..71147be3279
--- /dev/null
+++ b/arch/ia64/kernel/perfmon.c
@@ -0,0 +1,6676 @@
+/*
+ * This file implements the perfmon-2 subsystem which is used
+ * to program the IA-64 Performance Monitoring Unit (PMU).
+ *
+ * The initial version of perfmon.c was written by
+ * Ganesh Venkitachalam, IBM Corp.
+ *
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
+ * David Mosberger, Hewlett Packard Co.
+ *
+ * Version Perfmon-2.x is a rewrite of perfmon-1.x
+ * by Stephane Eranian, Hewlett Packard Co.
+ *
+ * Copyright (C) 1999-2003, 2005 Hewlett Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * More information about perfmon available at:
+ * http://www.hpl.hp.com/research/linux/perfmon
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/list.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/vfs.h>
+#include <linux/pagemap.h>
+#include <linux/mount.h>
+#include <linux/version.h>
+#include <linux/bitops.h>
+
+#include <asm/errno.h>
+#include <asm/intrinsics.h>
+#include <asm/page.h>
+#include <asm/perfmon.h>
+#include <asm/processor.h>
+#include <asm/signal.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/delay.h>
+
+#ifdef CONFIG_PERFMON
+/*
+ * perfmon context state
+ */
+#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */
+#define PFM_CTX_LOADED 2 /* context is loaded onto a task */
+#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */
+#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */
+
+#define PFM_INVALID_ACTIVATION (~0UL)
+
+/*
+ * depth of message queue
+ */
+#define PFM_MAX_MSGS 32
+#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail)
+
+/*
+ * type of a PMU register (bitmask).
+ * bitmask structure:
+ * bit0 : register implemented
+ * bit1 : end marker
+ * bit2-3 : reserved
+ * bit4 : pmc has pmc.pm
+ * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter
+ * bit6-7 : register type
+ * bit8-31: reserved
+ */
+#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */
+#define PFM_REG_IMPL 0x1 /* register implemented */
+#define PFM_REG_END 0x2 /* end marker */
+#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
+#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */
+#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */
+#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */
+#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */
+
+#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END)
+#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END)
+
+#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY)
+
+/* i assumed unsigned */
+#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL))
+#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL))
+
+/* XXX: these assume that register i is implemented */
+#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
+#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
+#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR)
+#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL)
+
+#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value
+#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask
+#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0]
+#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0]
+
+#define PFM_NUM_IBRS IA64_NUM_DBG_REGS
+#define PFM_NUM_DBRS IA64_NUM_DBG_REGS
+
+#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0)
+#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling)
+#define PFM_CTX_TASK(h) (h)->ctx_task
+
+#define PMU_PMC_OI 5 /* position of pmc.oi bit */
+
+/* XXX: does not support more than 64 PMDs */
+#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
+#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
+
+#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask)
+
+#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
+#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
+#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
+#define PFM_CODE_RR 0 /* requesting code range restriction */
+#define PFM_DATA_RR 1 /* requestion data range restriction */
+
+#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v)
+#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v)
+#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info)
+
+#define RDEP(x) (1UL<<(x))
+
+/*
+ * context protection macros
+ * in SMP:
+ * - we need to protect against CPU concurrency (spin_lock)
+ * - we need to protect against PMU overflow interrupts (local_irq_disable)
+ * in UP:
+ * - we need to protect against PMU overflow interrupts (local_irq_disable)
+ *
+ * spin_lock_irqsave()/spin_lock_irqrestore():
+ * in SMP: local_irq_disable + spin_lock
+ * in UP : local_irq_disable
+ *
+ * spin_lock()/spin_lock():
+ * in UP : removed automatically
+ * in SMP: protect against context accesses from other CPU. interrupts
+ * are not masked. This is useful for the PMU interrupt handler
+ * because we know we will not get PMU concurrency in that code.
+ */
+#define PROTECT_CTX(c, f) \
+ do { \
+ DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
+ spin_lock_irqsave(&(c)->ctx_lock, f); \
+ DPRINT(("spinlocked ctx %p by [%d]\n", c, current->pid)); \
+ } while(0)
+
+#define UNPROTECT_CTX(c, f) \
+ do { \
+ DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
+ spin_unlock_irqrestore(&(c)->ctx_lock, f); \
+ } while(0)
+
+#define PROTECT_CTX_NOPRINT(c, f) \
+ do { \
+ spin_lock_irqsave(&(c)->ctx_lock, f); \
+ } while(0)
+
+
+#define UNPROTECT_CTX_NOPRINT(c, f) \
+ do { \
+ spin_unlock_irqrestore(&(c)->ctx_lock, f); \
+ } while(0)
+
+
+#define PROTECT_CTX_NOIRQ(c) \
+ do { \
+ spin_lock(&(c)->ctx_lock); \
+ } while(0)
+
+#define UNPROTECT_CTX_NOIRQ(c) \
+ do { \
+ spin_unlock(&(c)->ctx_lock); \
+ } while(0)
+
+
+#ifdef CONFIG_SMP
+
+#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)
+#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++
+#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION()
+
+#else /* !CONFIG_SMP */
+#define SET_ACTIVATION(t) do {} while(0)
+#define GET_ACTIVATION(t) do {} while(0)
+#define INC_ACTIVATION(t) do {} while(0)
+#endif /* CONFIG_SMP */
+
+#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0)
+#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner)
+#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx)
+
+#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g)
+#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g)
+
+#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
+
+/*
+ * cmp0 must be the value of pmc0
+ */
+#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL)
+
+#define PFMFS_MAGIC 0xa0b4d889
+
+/*
+ * debugging
+ */
+#define PFM_DEBUGGING 1
+#ifdef PFM_DEBUGGING
+#define DPRINT(a) \
+ do { \
+ if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
+ } while (0)
+
+#define DPRINT_ovfl(a) \
+ do { \
+ if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
+ } while (0)
+#endif
+
+/*
+ * 64-bit software counter structure
+ *
+ * the next_reset_type is applied to the next call to pfm_reset_regs()
+ */
+typedef struct {
+ unsigned long val; /* virtual 64bit counter value */
+ unsigned long lval; /* last reset value */
+ unsigned long long_reset; /* reset value on sampling overflow */
+ unsigned long short_reset; /* reset value on overflow */
+ unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */
+ unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */
+ unsigned long seed; /* seed for random-number generator */
+ unsigned long mask; /* mask for random-number generator */
+ unsigned int flags; /* notify/do not notify */
+ unsigned long eventid; /* overflow event identifier */
+} pfm_counter_t;
+
+/*
+ * context flags
+ */
+typedef struct {
+ unsigned int block:1; /* when 1, task will blocked on user notifications */
+ unsigned int system:1; /* do system wide monitoring */
+ unsigned int using_dbreg:1; /* using range restrictions (debug registers) */
+ unsigned int is_sampling:1; /* true if using a custom format */
+ unsigned int excl_idle:1; /* exclude idle task in system wide session */
+ unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */
+ unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */
+ unsigned int no_msg:1; /* no message sent on overflow */
+ unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */
+ unsigned int reserved:22;
+} pfm_context_flags_t;
+
+#define PFM_TRAP_REASON_NONE 0x0 /* default value */
+#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */
+#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */
+
+
+/*
+ * perfmon context: encapsulates all the state of a monitoring session
+ */
+
+typedef struct pfm_context {
+ spinlock_t ctx_lock; /* context protection */
+
+ pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */
+ unsigned int ctx_state; /* state: active/inactive (no bitfield) */
+
+ struct task_struct *ctx_task; /* task to which context is attached */
+
+ unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */
+
+ struct semaphore ctx_restart_sem; /* use for blocking notification mode */
+
+ unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */
+ unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */
+ unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */
+
+ unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */
+ unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */
+ unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */
+
+ unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */
+
+ unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */
+ unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */
+ unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */
+ unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */
+
+ pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */
+
+ u64 ctx_saved_psr_up; /* only contains psr.up value */
+
+ unsigned long ctx_last_activation; /* context last activation number for last_cpu */
+ unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */
+ unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */
+
+ int ctx_fd; /* file descriptor used my this context */
+ pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */
+
+ pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */
+ void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */
+ unsigned long ctx_smpl_size; /* size of sampling buffer */
+ void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */
+
+ wait_queue_head_t ctx_msgq_wait;
+ pfm_msg_t ctx_msgq[PFM_MAX_MSGS];
+ int ctx_msgq_head;
+ int ctx_msgq_tail;
+ struct fasync_struct *ctx_async_queue;
+
+ wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */
+} pfm_context_t;
+
+/*
+ * magic number used to verify that structure is really
+ * a perfmon context
+ */
+#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops)
+
+#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context)
+
+#ifdef CONFIG_SMP
+#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v)
+#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu
+#else
+#define SET_LAST_CPU(ctx, v) do {} while(0)
+#define GET_LAST_CPU(ctx) do {} while(0)
+#endif
+
+
+#define ctx_fl_block ctx_flags.block
+#define ctx_fl_system ctx_flags.system
+#define ctx_fl_using_dbreg ctx_flags.using_dbreg
+#define ctx_fl_is_sampling ctx_flags.is_sampling
+#define ctx_fl_excl_idle ctx_flags.excl_idle
+#define ctx_fl_going_zombie ctx_flags.going_zombie
+#define ctx_fl_trap_reason ctx_flags.trap_reason
+#define ctx_fl_no_msg ctx_flags.no_msg
+#define ctx_fl_can_restart ctx_flags.can_restart
+
+#define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0);
+#define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking
+
+/*
+ * global information about all sessions
+ * mostly used to synchronize between system wide and per-process
+ */
+typedef struct {
+ spinlock_t pfs_lock; /* lock the structure */
+
+ unsigned int pfs_task_sessions; /* number of per task sessions */
+ unsigned int pfs_sys_sessions; /* number of per system wide sessions */
+ unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */
+ unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */
+ struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
+} pfm_session_t;
+
+/*
+ * information about a PMC or PMD.
+ * dep_pmd[]: a bitmask of dependent PMD registers
+ * dep_pmc[]: a bitmask of dependent PMC registers
+ */
+typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+typedef struct {
+ unsigned int type;
+ int pm_pos;
+ unsigned long default_value; /* power-on default value */
+ unsigned long reserved_mask; /* bitmask of reserved bits */
+ pfm_reg_check_t read_check;
+ pfm_reg_check_t write_check;
+ unsigned long dep_pmd[4];
+ unsigned long dep_pmc[4];
+} pfm_reg_desc_t;
+
+/* assume cnum is a valid monitor */
+#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1)
+
+/*
+ * This structure is initialized at boot time and contains
+ * a description of the PMU main characteristics.
+ *
+ * If the probe function is defined, detection is based
+ * on its return value:
+ * - 0 means recognized PMU
+ * - anything else means not supported
+ * When the probe function is not defined, then the pmu_family field
+ * is used and it must match the host CPU family such that:
+ * - cpu->family & config->pmu_family != 0
+ */
+typedef struct {
+ unsigned long ovfl_val; /* overflow value for counters */
+
+ pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */
+ pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */
+
+ unsigned int num_pmcs; /* number of PMCS: computed at init time */
+ unsigned int num_pmds; /* number of PMDS: computed at init time */
+ unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */
+ unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */
+
+ char *pmu_name; /* PMU family name */
+ unsigned int pmu_family; /* cpuid family pattern used to identify pmu */
+ unsigned int flags; /* pmu specific flags */
+ unsigned int num_ibrs; /* number of IBRS: computed at init time */
+ unsigned int num_dbrs; /* number of DBRS: computed at init time */
+ unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */
+ int (*probe)(void); /* customized probe routine */
+ unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */
+} pmu_config_t;
+/*
+ * PMU specific flags
+ */
+#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */
+
+/*
+ * debug register related type definitions
+ */
+typedef struct {
+ unsigned long ibr_mask:56;
+ unsigned long ibr_plm:4;
+ unsigned long ibr_ig:3;
+ unsigned long ibr_x:1;
+} ibr_mask_reg_t;
+
+typedef struct {
+ unsigned long dbr_mask:56;
+ unsigned long dbr_plm:4;
+ unsigned long dbr_ig:2;
+ unsigned long dbr_w:1;
+ unsigned long dbr_r:1;
+} dbr_mask_reg_t;
+
+typedef union {
+ unsigned long val;
+ ibr_mask_reg_t ibr;
+ dbr_mask_reg_t dbr;
+} dbreg_t;
+
+
+/*
+ * perfmon command descriptions
+ */
+typedef struct {
+ int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
+ char *cmd_name;
+ int cmd_flags;
+ unsigned int cmd_narg;
+ size_t cmd_argsize;
+ int (*cmd_getsize)(void *arg, size_t *sz);
+} pfm_cmd_desc_t;
+
+#define PFM_CMD_FD 0x01 /* command requires a file descriptor */
+#define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */
+#define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */
+#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */
+
+
+#define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name
+#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ)
+#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW)
+#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD)
+#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP)
+
+#define PFM_CMD_ARG_MANY -1 /* cannot be zero */
+
+typedef struct {
+ int debug; /* turn on/off debugging via syslog */
+ int debug_ovfl; /* turn on/off debug printk in overflow handler */
+ int fastctxsw; /* turn on/off fast (unsecure) ctxsw */
+ int expert_mode; /* turn on/off value checking */
+ int debug_pfm_read;
+} pfm_sysctl_t;
+
+typedef struct {
+ unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */
+ unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */
+ unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */
+ unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */
+ unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */
+ unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */
+ unsigned long pfm_smpl_handler_calls;
+ unsigned long pfm_smpl_handler_cycles;
+ char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
+} pfm_stats_t;
+
+/*
+ * perfmon internal variables
+ */
+static pfm_stats_t pfm_stats[NR_CPUS];
+static pfm_session_t pfm_sessions; /* global sessions information */
+
+static struct proc_dir_entry *perfmon_dir;
+static pfm_uuid_t pfm_null_uuid = {0,};
+
+static spinlock_t pfm_buffer_fmt_lock;
+static LIST_HEAD(pfm_buffer_fmt_list);
+
+static pmu_config_t *pmu_conf;
+
+/* sysctl() controls */
+static pfm_sysctl_t pfm_sysctl;
+int pfm_debug_var;
+
+static ctl_table pfm_ctl_table[]={
+ {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
+ {2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
+ {3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
+ {4, "expert_mode", &pfm_sysctl.expert_mode, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
+ { 0, },
+};
+static ctl_table pfm_sysctl_dir[] = {
+ {1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
+ {0,},
+};
+static ctl_table pfm_sysctl_root[] = {
+ {1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
+ {0,},
+};
+static struct ctl_table_header *pfm_sysctl_header;
+
+static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
+static int pfm_flush(struct file *filp);
+
+#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v)
+#define pfm_get_cpu_data(a,b) per_cpu(a, b)
+
+static inline void
+pfm_put_task(struct task_struct *task)
+{
+ if (task != current) put_task_struct(task);
+}
+
+static inline void
+pfm_set_task_notify(struct task_struct *task)
+{
+ struct thread_info *info;
+
+ info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
+ set_bit(TIF_NOTIFY_RESUME, &info->flags);
+}
+
+static inline void
+pfm_clear_task_notify(void)
+{
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+}
+
+static inline void
+pfm_reserve_page(unsigned long a)
+{
+ SetPageReserved(vmalloc_to_page((void *)a));
+}
+static inline void
+pfm_unreserve_page(unsigned long a)
+{
+ ClearPageReserved(vmalloc_to_page((void*)a));
+}
+
+static inline unsigned long
+pfm_protect_ctx_ctxsw(pfm_context_t *x)
+{
+ spin_lock(&(x)->ctx_lock);
+ return 0UL;
+}
+
+static inline unsigned long
+pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
+{
+ spin_unlock(&(x)->ctx_lock);
+}
+
+static inline unsigned int
+pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct)
+{
+ return do_munmap(mm, addr, len);
+}
+
+static inline unsigned long
+pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec)
+{
+ return get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
+
+static struct super_block *
+pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
+{
+ return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC);
+}
+
+static struct file_system_type pfm_fs_type = {
+ .name = "pfmfs",
+ .get_sb = pfmfs_get_sb,
+ .kill_sb = kill_anon_super,
+};
+
+DEFINE_PER_CPU(unsigned long, pfm_syst_info);
+DEFINE_PER_CPU(struct task_struct *, pmu_owner);
+DEFINE_PER_CPU(pfm_context_t *, pmu_ctx);
+DEFINE_PER_CPU(unsigned long, pmu_activation_number);
+
+
+/* forward declaration */
+static struct file_operations pfm_file_ops;
+
+/*
+ * forward declarations
+ */
+#ifndef CONFIG_SMP
+static void pfm_lazy_save_regs (struct task_struct *ta);
+#endif
+
+void dump_pmu_state(const char *);
+static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
+
+#include "perfmon_itanium.h"
+#include "perfmon_mckinley.h"
+#include "perfmon_generic.h"
+
+static pmu_config_t *pmu_confs[]={
+ &pmu_conf_mck,
+ &pmu_conf_ita,
+ &pmu_conf_gen, /* must be last */
+ NULL
+};
+
+
+static int pfm_end_notify_user(pfm_context_t *ctx);
+
+static inline void
+pfm_clear_psr_pp(void)
+{
+ ia64_rsm(IA64_PSR_PP);
+ ia64_srlz_i();
+}
+
+static inline void
+pfm_set_psr_pp(void)
+{
+ ia64_ssm(IA64_PSR_PP);
+ ia64_srlz_i();
+}
+
+static inline void
+pfm_clear_psr_up(void)
+{
+ ia64_rsm(IA64_PSR_UP);
+ ia64_srlz_i();
+}
+
+static inline void
+pfm_set_psr_up(void)
+{
+ ia64_ssm(IA64_PSR_UP);
+ ia64_srlz_i();
+}
+
+static inline unsigned long
+pfm_get_psr(void)
+{
+ unsigned long tmp;
+ tmp = ia64_getreg(_IA64_REG_PSR);
+ ia64_srlz_i();
+ return tmp;
+}
+
+static inline void
+pfm_set_psr_l(unsigned long val)
+{
+ ia64_setreg(_IA64_REG_PSR_L, val);
+ ia64_srlz_i();
+}
+
+static inline void
+pfm_freeze_pmu(void)
+{
+ ia64_set_pmc(0,1UL);
+ ia64_srlz_d();
+}
+
+static inline void
+pfm_unfreeze_pmu(void)
+{
+ ia64_set_pmc(0,0UL);
+ ia64_srlz_d();
+}
+
+static inline void
+pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
+{
+ int i;
+
+ for (i=0; i < nibrs; i++) {
+ ia64_set_ibr(i, ibrs[i]);
+ ia64_dv_serialize_instruction();
+ }
+ ia64_srlz_i();
+}
+
+static inline void
+pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
+{
+ int i;
+
+ for (i=0; i < ndbrs; i++) {
+ ia64_set_dbr(i, dbrs[i]);
+ ia64_dv_serialize_data();
+ }
+ ia64_srlz_d();
+}
+
+/*
+ * PMD[i] must be a counter. no check is made
+ */
+static inline unsigned long
+pfm_read_soft_counter(pfm_context_t *ctx, int i)
+{
+ return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val);
+}
+
+/*
+ * PMD[i] must be a counter. no check is made
+ */
+static inline void
+pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
+{
+ unsigned long ovfl_val = pmu_conf->ovfl_val;
+
+ ctx->ctx_pmds[i].val = val & ~ovfl_val;
+ /*
+ * writing to unimplemented part is ignore, so we do not need to
+ * mask off top part
+ */
+ ia64_set_pmd(i, val & ovfl_val);
+}
+
+static pfm_msg_t *
+pfm_get_new_msg(pfm_context_t *ctx)
+{
+ int idx, next;
+
+ next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS;
+
+ DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
+ if (next == ctx->ctx_msgq_head) return NULL;
+
+ idx = ctx->ctx_msgq_tail;
+ ctx->ctx_msgq_tail = next;
+
+ DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx));
+
+ return ctx->ctx_msgq+idx;
+}
+
+static pfm_msg_t *
+pfm_get_next_msg(pfm_context_t *ctx)
+{
+ pfm_msg_t *msg;
+
+ DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
+
+ if (PFM_CTXQ_EMPTY(ctx)) return NULL;
+
+ /*
+ * get oldest message
+ */
+ msg = ctx->ctx_msgq+ctx->ctx_msgq_head;
+
+ /*
+ * and move forward
+ */
+ ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS;
+
+ DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type));
+
+ return msg;
+}
+
+static void
+pfm_reset_msgq(pfm_context_t *ctx)
+{
+ ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
+ DPRINT(("ctx=%p msgq reset\n", ctx));
+}
+
+static void *
+pfm_rvmalloc(unsigned long size)
+{
+ void *mem;
+ unsigned long addr;
+
+ size = PAGE_ALIGN(size);
+ mem = vmalloc(size);
+ if (mem) {
+ //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
+ memset(mem, 0, size);
+ addr = (unsigned long)mem;
+ while (size > 0) {
+ pfm_reserve_page(addr);
+ addr+=PAGE_SIZE;
+ size-=PAGE_SIZE;
+ }
+ }
+ return mem;
+}
+
+static void
+pfm_rvfree(void *mem, unsigned long size)
+{
+ unsigned long addr;
+
+ if (mem) {
+ DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size));
+ addr = (unsigned long) mem;
+ while ((long) size > 0) {
+ pfm_unreserve_page(addr);
+ addr+=PAGE_SIZE;
+ size-=PAGE_SIZE;
+ }
+ vfree(mem);
+ }
+ return;
+}
+
+static pfm_context_t *
+pfm_context_alloc(void)
+{
+ pfm_context_t *ctx;
+
+ /*
+ * allocate context descriptor
+ * must be able to free with interrupts disabled
+ */
+ ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
+ if (ctx) {
+ memset(ctx, 0, sizeof(pfm_context_t));
+ DPRINT(("alloc ctx @%p\n", ctx));
+ }
+ return ctx;
+}
+
+static void
+pfm_context_free(pfm_context_t *ctx)
+{
+ if (ctx) {
+ DPRINT(("free ctx @%p\n", ctx));
+ kfree(ctx);
+ }
+}
+
+static void
+pfm_mask_monitoring(struct task_struct *task)
+{
+ pfm_context_t *ctx = PFM_GET_CTX(task);
+ struct thread_struct *th = &task->thread;
+ unsigned long mask, val, ovfl_mask;
+ int i;
+
+ DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid));
+
+ ovfl_mask = pmu_conf->ovfl_val;
+ /*
+ * monitoring can only be masked as a result of a valid
+ * counter overflow. In UP, it means that the PMU still
+ * has an owner. Note that the owner can be different
+ * from the current task. However the PMU state belongs
+ * to the owner.
+ * In SMP, a valid overflow only happens when task is
+ * current. Therefore if we come here, we know that
+ * the PMU state belongs to the current task, therefore
+ * we can access the live registers.
+ *
+ * So in both cases, the live register contains the owner's
+ * state. We can ONLY touch the PMU registers and NOT the PSR.
+ *
+ * As a consequence to this call, the thread->pmds[] array
+ * contains stale information which must be ignored
+ * when context is reloaded AND monitoring is active (see
+ * pfm_restart).
+ */
+ mask = ctx->ctx_used_pmds[0];
+ for (i = 0; mask; i++, mask>>=1) {
+ /* skip non used pmds */
+ if ((mask & 0x1) == 0) continue;
+ val = ia64_get_pmd(i);
+
+ if (PMD_IS_COUNTING(i)) {
+ /*
+ * we rebuild the full 64 bit value of the counter
+ */
+ ctx->ctx_pmds[i].val += (val & ovfl_mask);
+ } else {
+ ctx->ctx_pmds[i].val = val;
+ }
+ DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
+ i,
+ ctx->ctx_pmds[i].val,
+ val & ovfl_mask));
+ }
+ /*
+ * mask monitoring by setting the privilege level to 0
+ * we cannot use psr.pp/psr.up for this, it is controlled by
+ * the user
+ *
+ * if task is current, modify actual registers, otherwise modify
+ * thread save state, i.e., what will be restored in pfm_load_regs()
+ */
+ mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
+ for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
+ if ((mask & 0x1) == 0UL) continue;
+ ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
+ th->pmcs[i] &= ~0xfUL;
+ DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i]));
+ }
+ /*
+ * make all of this visible
+ */
+ ia64_srlz_d();
+}
+
+/*
+ * must always be done with task == current
+ *
+ * context must be in MASKED state when calling
+ */
+static void
+pfm_restore_monitoring(struct task_struct *task)
+{
+ pfm_context_t *ctx = PFM_GET_CTX(task);
+ struct thread_struct *th = &task->thread;
+ unsigned long mask, ovfl_mask;
+ unsigned long psr, val;
+ int i, is_system;
+
+ is_system = ctx->ctx_fl_system;
+ ovfl_mask = pmu_conf->ovfl_val;
+
+ if (task != current) {
+ printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
+ return;
+ }
+ if (ctx->ctx_state != PFM_CTX_MASKED) {
+ printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
+ task->pid, current->pid, ctx->ctx_state);
+ return;
+ }
+ psr = pfm_get_psr();
+ /*
+ * monitoring is masked via the PMC.
+ * As we restore their value, we do not want each counter to
+ * restart right away. We stop monitoring using the PSR,
+ * restore the PMC (and PMD) and then re-establish the psr
+ * as it was. Note that there can be no pending overflow at
+ * this point, because monitoring was MASKED.
+ *
+ * system-wide session are pinned and self-monitoring
+ */
+ if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
+ /* disable dcr pp */
+ ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
+ pfm_clear_psr_pp();
+ } else {
+ pfm_clear_psr_up();
+ }
+ /*
+ * first, we restore the PMD
+ */
+ mask = ctx->ctx_used_pmds[0];
+ for (i = 0; mask; i++, mask>>=1) {
+ /* skip non used pmds */
+ if ((mask & 0x1) == 0) continue;
+
+ if (PMD_IS_COUNTING(i)) {
+ /*
+ * we split the 64bit value according to
+ * counter width
+ */
+ val = ctx->ctx_pmds[i].val & ovfl_mask;
+ ctx->ctx_pmds[i].val &= ~ovfl_mask;
+ } else {
+ val = ctx->ctx_pmds[i].val;
+ }
+ ia64_set_pmd(i, val);
+
+ DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
+ i,
+ ctx->ctx_pmds[i].val,
+ val));
+ }
+ /*
+ * restore the PMCs
+ */
+ mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
+ for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
+ if ((mask & 0x1) == 0UL) continue;
+ th->pmcs[i] = ctx->ctx_pmcs[i];
+ ia64_set_pmc(i, th->pmcs[i]);
+ DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i]));
+ }
+ ia64_srlz_d();
+
+ /*
+ * must restore DBR/IBR because could be modified while masked
+ * XXX: need to optimize
+ */
+ if (ctx->ctx_fl_using_dbreg) {
+ pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
+ pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
+ }
+
+ /*
+ * now restore PSR
+ */
+ if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
+ /* enable dcr pp */
+ ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
+ ia64_srlz_i();
+ }
+ pfm_set_psr_l(psr);
+}
+
+static inline void
+pfm_save_pmds(unsigned long *pmds, unsigned long mask)
+{
+ int i;
+
+ ia64_srlz_d();
+
+ for (i=0; mask; i++, mask>>=1) {
+ if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
+ }
+}
+
+/*
+ * reload from thread state (used for ctxw only)
+ */
+static inline void
+pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
+{
+ int i;
+ unsigned long val, ovfl_val = pmu_conf->ovfl_val;
+
+ for (i=0; mask; i++, mask>>=1) {
+ if ((mask & 0x1) == 0) continue;
+ val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
+ ia64_set_pmd(i, val);
+ }
+ ia64_srlz_d();
+}
+
+/*
+ * propagate PMD from context to thread-state
+ */
+static inline void
+pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
+{
+ struct thread_struct *thread = &task->thread;
+ unsigned long ovfl_val = pmu_conf->ovfl_val;
+ unsigned long mask = ctx->ctx_all_pmds[0];
+ unsigned long val;
+ int i;
+
+ DPRINT(("mask=0x%lx\n", mask));
+
+ for (i=0; mask; i++, mask>>=1) {
+
+ val = ctx->ctx_pmds[i].val;
+
+ /*
+ * We break up the 64 bit value into 2 pieces
+ * the lower bits go to the machine state in the
+ * thread (will be reloaded on ctxsw in).
+ * The upper part stays in the soft-counter.
+ */
+ if (PMD_IS_COUNTING(i)) {
+ ctx->ctx_pmds[i].val = val & ~ovfl_val;
+ val &= ovfl_val;
+ }
+ thread->pmds[i] = val;
+
+ DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
+ i,
+ thread->pmds[i],
+ ctx->ctx_pmds[i].val));
+ }
+}
+
+/*
+ * propagate PMC from context to thread-state
+ */
+static inline void
+pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
+{
+ struct thread_struct *thread = &task->thread;
+ unsigned long mask = ctx->ctx_all_pmcs[0];
+ int i;
+
+ DPRINT(("mask=0x%lx\n", mask));
+
+ for (i=0; mask; i++, mask>>=1) {
+ /* masking 0 with ovfl_val yields 0 */
+ thread->pmcs[i] = ctx->ctx_pmcs[i];
+ DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i]));
+ }
+}
+
+
+
+static inline void
+pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask)
+{
+ int i;
+
+ for (i=0; mask; i++, mask>>=1) {
+ if ((mask & 0x1) == 0) continue;
+ ia64_set_pmc(i, pmcs[i]);
+ }
+ ia64_srlz_d();
+}
+
+static inline int
+pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b)
+{
+ return memcmp(a, b, sizeof(pfm_uuid_t));
+}
+
+static inline int
+pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs)
+{
+ int ret = 0;
+ if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs);
+ return ret;
+}
+
+static inline int
+pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size)
+{
+ int ret = 0;
+ if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size);
+ return ret;
+}
+
+
+static inline int
+pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags,
+ int cpu, void *arg)
+{
+ int ret = 0;
+ if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg);
+ return ret;
+}
+
+static inline int
+pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags,
+ int cpu, void *arg)
+{
+ int ret = 0;
+ if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg);
+ return ret;
+}
+
+static inline int
+pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
+{
+ int ret = 0;
+ if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs);
+ return ret;
+}
+
+static inline int
+pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
+{
+ int ret = 0;
+ if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs);
+ return ret;
+}
+
+static pfm_buffer_fmt_t *
+__pfm_find_buffer_fmt(pfm_uuid_t uuid)
+{
+ struct list_head * pos;
+ pfm_buffer_fmt_t * entry;
+
+ list_for_each(pos, &pfm_buffer_fmt_list) {
+ entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
+ if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0)
+ return entry;
+ }
+ return NULL;
+}
+
+/*
+ * find a buffer format based on its uuid
+ */
+static pfm_buffer_fmt_t *
+pfm_find_buffer_fmt(pfm_uuid_t uuid)
+{
+ pfm_buffer_fmt_t * fmt;
+ spin_lock(&pfm_buffer_fmt_lock);
+ fmt = __pfm_find_buffer_fmt(uuid);
+ spin_unlock(&pfm_buffer_fmt_lock);
+ return fmt;
+}
+
+int
+pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt)
+{
+ int ret = 0;
+
+ /* some sanity checks */
+ if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL;
+
+ /* we need at least a handler */
+ if (fmt->fmt_handler == NULL) return -EINVAL;
+
+ /*
+ * XXX: need check validity of fmt_arg_size
+ */
+
+ spin_lock(&pfm_buffer_fmt_lock);
+
+ if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) {
+ printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name);
+ ret = -EBUSY;
+ goto out;
+ }
+ list_add(&fmt->fmt_list, &pfm_buffer_fmt_list);
+ printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name);
+
+out:
+ spin_unlock(&pfm_buffer_fmt_lock);
+ return ret;
+}
+EXPORT_SYMBOL(pfm_register_buffer_fmt);
+
+int
+pfm_unregister_buffer_fmt(pfm_uuid_t uuid)
+{
+ pfm_buffer_fmt_t *fmt;
+ int ret = 0;
+
+ spin_lock(&pfm_buffer_fmt_lock);
+
+ fmt = __pfm_find_buffer_fmt(uuid);
+ if (!fmt) {
+ printk(KERN_ERR "perfmon: cannot unregister format, not found\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ list_del_init(&fmt->fmt_list);
+ printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name);
+
+out:
+ spin_unlock(&pfm_buffer_fmt_lock);
+ return ret;
+
+}
+EXPORT_SYMBOL(pfm_unregister_buffer_fmt);
+
+static int
+pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
+{
+ unsigned long flags;
+ /*
+ * validy checks on cpu_mask have been done upstream
+ */
+ LOCK_PFS(flags);
+
+ DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
+ pfm_sessions.pfs_sys_sessions,
+ pfm_sessions.pfs_task_sessions,
+ pfm_sessions.pfs_sys_use_dbregs,
+ is_syswide,
+ cpu));
+
+ if (is_syswide) {
+ /*
+ * cannot mix system wide and per-task sessions
+ */
+ if (pfm_sessions.pfs_task_sessions > 0UL) {
+ DPRINT(("system wide not possible, %u conflicting task_sessions\n",
+ pfm_sessions.pfs_task_sessions));
+ goto abort;
+ }
+
+ if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict;
+
+ DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id()));
+
+ pfm_sessions.pfs_sys_session[cpu] = task;
+
+ pfm_sessions.pfs_sys_sessions++ ;
+
+ } else {
+ if (pfm_sessions.pfs_sys_sessions) goto abort;
+ pfm_sessions.pfs_task_sessions++;
+ }
+
+ DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
+ pfm_sessions.pfs_sys_sessions,
+ pfm_sessions.pfs_task_sessions,
+ pfm_sessions.pfs_sys_use_dbregs,
+ is_syswide,
+ cpu));
+
+ UNLOCK_PFS(flags);
+
+ return 0;
+
+error_conflict:
+ DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
+ pfm_sessions.pfs_sys_session[cpu]->pid,
+ smp_processor_id()));
+abort:
+ UNLOCK_PFS(flags);
+
+ return -EBUSY;
+
+}
+
+static int
+pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
+{
+ unsigned long flags;
+ /*
+ * validy checks on cpu_mask have been done upstream
+ */
+ LOCK_PFS(flags);
+
+ DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
+ pfm_sessions.pfs_sys_sessions,
+ pfm_sessions.pfs_task_sessions,
+ pfm_sessions.pfs_sys_use_dbregs,
+ is_syswide,
+ cpu));
+
+
+ if (is_syswide) {
+ pfm_sessions.pfs_sys_session[cpu] = NULL;
+ /*
+ * would not work with perfmon+more than one bit in cpu_mask
+ */
+ if (ctx && ctx->ctx_fl_using_dbreg) {
+ if (pfm_sessions.pfs_sys_use_dbregs == 0) {
+ printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx);
+ } else {
+ pfm_sessions.pfs_sys_use_dbregs--;
+ }
+ }
+ pfm_sessions.pfs_sys_sessions--;
+ } else {
+ pfm_sessions.pfs_task_sessions--;
+ }
+ DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
+ pfm_sessions.pfs_sys_sessions,
+ pfm_sessions.pfs_task_sessions,
+ pfm_sessions.pfs_sys_use_dbregs,
+ is_syswide,
+ cpu));
+
+ UNLOCK_PFS(flags);
+
+ return 0;
+}
+
+/*
+ * removes virtual mapping of the sampling buffer.
+ * IMPORTANT: cannot be called with interrupts disable, e.g. inside
+ * a PROTECT_CTX() section.
+ */
+static int
+pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size)
+{
+ int r;
+
+ /* sanity checks */
+ if (task->mm == NULL || size == 0UL || vaddr == NULL) {
+ printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm);
+ return -EINVAL;
+ }
+
+ DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size));
+
+ /*
+ * does the actual unmapping
+ */
+ down_write(&task->mm->mmap_sem);
+
+ DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size));
+
+ r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0);
+
+ up_write(&task->mm->mmap_sem);
+ if (r !=0) {
+ printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size);
+ }
+
+ DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));
+
+ return 0;
+}
+
+/*
+ * free actual physical storage used by sampling buffer
+ */
+#if 0
+static int
+pfm_free_smpl_buffer(pfm_context_t *ctx)
+{
+ pfm_buffer_fmt_t *fmt;
+
+ if (ctx->ctx_smpl_hdr == NULL) goto invalid_free;
+
+ /*
+ * we won't use the buffer format anymore
+ */
+ fmt = ctx->ctx_buf_fmt;
+
+ DPRINT(("sampling buffer @%p size %lu vaddr=%p\n",
+ ctx->ctx_smpl_hdr,
+ ctx->ctx_smpl_size,
+ ctx->ctx_smpl_vaddr));
+
+ pfm_buf_fmt_exit(fmt, current, NULL, NULL);
+
+ /*
+ * free the buffer
+ */
+ pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size);
+
+ ctx->ctx_smpl_hdr = NULL;
+ ctx->ctx_smpl_size = 0UL;
+
+ return 0;
+
+invalid_free:
+ printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid);
+ return -EINVAL;
+}
+#endif
+
+static inline void
+pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
+{
+ if (fmt == NULL) return;
+
+ pfm_buf_fmt_exit(fmt, current, NULL, NULL);
+
+}
+
+/*
+ * pfmfs should _never_ be mounted by userland - too much of security hassle,
+ * no real gain from having the whole whorehouse mounted. So we don't need
+ * any operations on the root directory. However, we need a non-trivial
+ * d_name - pfm: will go nicely and kill the special-casing in procfs.
+ */
+static struct vfsmount *pfmfs_mnt;
+
+static int __init
+init_pfm_fs(void)
+{
+ int err = register_filesystem(&pfm_fs_type);
+ if (!err) {
+ pfmfs_mnt = kern_mount(&pfm_fs_type);
+ err = PTR_ERR(pfmfs_mnt);
+ if (IS_ERR(pfmfs_mnt))
+ unregister_filesystem(&pfm_fs_type);
+ else
+ err = 0;
+ }
+ return err;
+}
+
+static void __exit
+exit_pfm_fs(void)
+{
+ unregister_filesystem(&pfm_fs_type);
+ mntput(pfmfs_mnt);
+}
+
+static ssize_t
+pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
+{
+ pfm_context_t *ctx;
+ pfm_msg_t *msg;
+ ssize_t ret;
+ unsigned long flags;
+ DECLARE_WAITQUEUE(wait, current);
+ if (PFM_IS_FILE(filp) == 0) {
+ printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
+ return -EINVAL;
+ }
+
+ ctx = (pfm_context_t *)filp->private_data;
+ if (ctx == NULL) {
+ printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid);
+ return -EINVAL;
+ }
+
+ /*
+ * check even when there is no message
+ */
+ if (size < sizeof(pfm_msg_t)) {
+ DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t)));
+ return -EINVAL;
+ }
+
+ PROTECT_CTX(ctx, flags);
+
+ /*
+ * put ourselves on the wait queue
+ */
+ add_wait_queue(&ctx->ctx_msgq_wait, &wait);
+
+
+ for(;;) {
+ /*
+ * check wait queue
+ */
+
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
+
+ ret = 0;
+ if(PFM_CTXQ_EMPTY(ctx) == 0) break;
+
+ UNPROTECT_CTX(ctx, flags);
+
+ /*
+ * check non-blocking read
+ */
+ ret = -EAGAIN;
+ if(filp->f_flags & O_NONBLOCK) break;
+
+ /*
+ * check pending signals
+ */
+ if(signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
+ /*
+ * no message, so wait
+ */
+ schedule();
+
+ PROTECT_CTX(ctx, flags);
+ }
+ DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret));
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&ctx->ctx_msgq_wait, &wait);
+
+ if (ret < 0) goto abort;
+
+ ret = -EINVAL;
+ msg = pfm_get_next_msg(ctx);
+ if (msg == NULL) {
+ printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid);
+ goto abort_locked;
+ }
+
+ DPRINT(("[%d] fd=%d type=%d\n", current->pid, msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));
+
+ ret = -EFAULT;
+ if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t);
+
+abort_locked:
+ UNPROTECT_CTX(ctx, flags);
+abort:
+ return ret;
+}
+
+static ssize_t
+pfm_write(struct file *file, const char __user *ubuf,
+ size_t size, loff_t *ppos)
+{
+ DPRINT(("pfm_write called\n"));
+ return -EINVAL;
+}
+
+static unsigned int
+pfm_poll(struct file *filp, poll_table * wait)
+{
+ pfm_context_t *ctx;
+ unsigned long flags;
+ unsigned int mask = 0;
+
+ if (PFM_IS_FILE(filp) == 0) {
+ printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
+ return 0;
+ }
+
+ ctx = (pfm_context_t *)filp->private_data;
+ if (ctx == NULL) {
+ printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid);
+ return 0;
+ }
+
+
+ DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd));
+
+ poll_wait(filp, &ctx->ctx_msgq_wait, wait);
+
+ PROTECT_CTX(ctx, flags);
+
+ if (PFM_CTXQ_EMPTY(ctx) == 0)
+ mask = POLLIN | POLLRDNORM;
+
+ UNPROTECT_CTX(ctx, flags);
+
+ DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask));
+
+ return mask;
+}
+
+static int
+pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+ DPRINT(("pfm_ioctl called\n"));
+ return -EINVAL;
+}
+
+/*
+ * interrupt cannot be masked when coming here
+ */
+static inline int
+pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
+{
+ int ret;
+
+ ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);
+
+ DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
+ current->pid,
+ fd,
+ on,
+ ctx->ctx_async_queue, ret));
+
+ return ret;
+}
+
+static int
+pfm_fasync(int fd, struct file *filp, int on)
+{
+ pfm_context_t *ctx;
+ int ret;
+
+ if (PFM_IS_FILE(filp) == 0) {
+ printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid);
+ return -EBADF;
+ }
+
+ ctx = (pfm_context_t *)filp->private_data;
+ if (ctx == NULL) {
+ printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
+ return -EBADF;
+ }
+ /*
+ * we cannot mask interrupts during this call because this may
+ * may go to sleep if memory is not readily avalaible.
+ *
+ * We are protected from the conetxt disappearing by the get_fd()/put_fd()
+ * done in caller. Serialization of this function is ensured by caller.
+ */
+ ret = pfm_do_fasync(fd, filp, ctx, on);
+
+
+ DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
+ fd,
+ on,
+ ctx->ctx_async_queue, ret));
+
+ return ret;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * this function is exclusively called from pfm_close().
+ * The context is not protected at that time, nor are interrupts
+ * on the remote CPU. That's necessary to avoid deadlocks.
+ */
+static void
+pfm_syswide_force_stop(void *info)
+{
+ pfm_context_t *ctx = (pfm_context_t *)info;
+ struct pt_regs *regs = ia64_task_regs(current);
+ struct task_struct *owner;
+ unsigned long flags;
+ int ret;
+
+ if (ctx->ctx_cpu != smp_processor_id()) {
+ printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n",
+ ctx->ctx_cpu,
+ smp_processor_id());
+ return;
+ }
+ owner = GET_PMU_OWNER();
+ if (owner != ctx->ctx_task) {
+ printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
+ smp_processor_id(),
+ owner->pid, ctx->ctx_task->pid);
+ return;
+ }
+ if (GET_PMU_CTX() != ctx) {
+ printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n",
+ smp_processor_id(),
+ GET_PMU_CTX(), ctx);
+ return;
+ }
+
+ DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid));
+ /*
+ * the context is already protected in pfm_close(), we simply
+ * need to mask interrupts to avoid a PMU interrupt race on
+ * this CPU
+ */
+ local_irq_save(flags);
+
+ ret = pfm_context_unload(ctx, NULL, 0, regs);
+ if (ret) {
+ DPRINT(("context_unload returned %d\n", ret));
+ }
+
+ /*
+ * unmask interrupts, PMU interrupts are now spurious here
+ */
+ local_irq_restore(flags);
+}
+
+static void
+pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
+{
+ int ret;
+
+ DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu));
+ ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1);
+ DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret));
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * called for each close(). Partially free resources.
+ * When caller is self-monitoring, the context is unloaded.
+ */
+static int
+pfm_flush(struct file *filp)
+{
+ pfm_context_t *ctx;
+ struct task_struct *task;
+ struct pt_regs *regs;
+ unsigned long flags;
+ unsigned long smpl_buf_size = 0UL;
+ void *smpl_buf_vaddr = NULL;
+ int state, is_system;
+
+ if (PFM_IS_FILE(filp) == 0) {
+ DPRINT(("bad magic for\n"));
+ return -EBADF;
+ }
+
+ ctx = (pfm_context_t *)filp->private_data;
+ if (ctx == NULL) {
+ printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid);
+ return -EBADF;
+ }
+
+ /*
+ * remove our file from the async queue, if we use this mode.
+ * This can be done without the context being protected. We come
+ * here when the context has become unreacheable by other tasks.
+ *
+ * We may still have active monitoring at this point and we may
+ * end up in pfm_overflow_handler(). However, fasync_helper()
+ * operates with interrupts disabled and it cleans up the
+ * queue. If the PMU handler is called prior to entering
+ * fasync_helper() then it will send a signal. If it is
+ * invoked after, it will find an empty queue and no
+ * signal will be sent. In both case, we are safe
+ */
+ if (filp->f_flags & FASYNC) {
+ DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue));
+ pfm_do_fasync (-1, filp, ctx, 0);
+ }
+
+ PROTECT_CTX(ctx, flags);
+
+ state = ctx->ctx_state;
+ is_system = ctx->ctx_fl_system;
+
+ task = PFM_CTX_TASK(ctx);
+ regs = ia64_task_regs(task);
+
+ DPRINT(("ctx_state=%d is_current=%d\n",
+ state,
+ task == current ? 1 : 0));
+
+ /*
+ * if state == UNLOADED, then task is NULL
+ */
+
+ /*
+ * we must stop and unload because we are losing access to the context.
+ */
+ if (task == current) {
+#ifdef CONFIG_SMP
+ /*
+ * the task IS the owner but it migrated to another CPU: that's bad
+ * but we must handle this cleanly. Unfortunately, the kernel does
+ * not provide a mechanism to block migration (while the context is loaded).
+ *
+ * We need to release the resource on the ORIGINAL cpu.
+ */
+ if (is_system && ctx->ctx_cpu != smp_processor_id()) {
+
+ DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+ /*
+ * keep context protected but unmask interrupt for IPI
+ */
+ local_irq_restore(flags);
+
+ pfm_syswide_cleanup_other_cpu(ctx);
+
+ /*
+ * restore interrupt masking
+ */
+ local_irq_save(flags);
+
+ /*
+ * context is unloaded at this point
+ */
+ } else
+#endif /* CONFIG_SMP */
+ {
+
+ DPRINT(("forcing unload\n"));
+ /*
+ * stop and unload, returning with state UNLOADED
+ * and session unreserved.
+ */
+ pfm_context_unload(ctx, NULL, 0, regs);
+
+ DPRINT(("ctx_state=%d\n", ctx->ctx_state));
+ }
+ }
+
+ /*
+ * remove virtual mapping, if any, for the calling task.
+ * cannot reset ctx field until last user is calling close().
+ *
+ * ctx_smpl_vaddr must never be cleared because it is needed
+ * by every task with access to the context
+ *
+ * When called from do_exit(), the mm context is gone already, therefore
+ * mm is NULL, i.e., the VMA is already gone and we do not have to
+ * do anything here
+ */
+ if (ctx->ctx_smpl_vaddr && current->mm) {
+ smpl_buf_vaddr = ctx->ctx_smpl_vaddr;
+ smpl_buf_size = ctx->ctx_smpl_size;
+ }
+
+ UNPROTECT_CTX(ctx, flags);
+
+ /*
+ * if there was a mapping, then we systematically remove it
+ * at this point. Cannot be done inside critical section
+ * because some VM function reenables interrupts.
+ *
+ */
+ if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size);
+
+ return 0;
+}
+/*
+ * called either on explicit close() or from exit_files().
+ * Only the LAST user of the file gets to this point, i.e., it is
+ * called only ONCE.
+ *
+ * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero
+ * (fput()),i.e, last task to access the file. Nobody else can access the
+ * file at this point.
+ *
+ * When called from exit_files(), the VMA has been freed because exit_mm()
+ * is executed before exit_files().
+ *
+ * When called from exit_files(), the current task is not yet ZOMBIE but we
+ * flush the PMU state to the context.
+ */
+static int
+pfm_close(struct inode *inode, struct file *filp)
+{
+ pfm_context_t *ctx;
+ struct task_struct *task;
+ struct pt_regs *regs;
+ DECLARE_WAITQUEUE(wait, current);
+ unsigned long flags;
+ unsigned long smpl_buf_size = 0UL;
+ void *smpl_buf_addr = NULL;
+ int free_possible = 1;
+ int state, is_system;
+
+ DPRINT(("pfm_close called private=%p\n", filp->private_data));
+
+ if (PFM_IS_FILE(filp) == 0) {
+ DPRINT(("bad magic\n"));
+ return -EBADF;
+ }
+
+ ctx = (pfm_context_t *)filp->private_data;
+ if (ctx == NULL) {
+ printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid);
+ return -EBADF;
+ }
+
+ PROTECT_CTX(ctx, flags);
+
+ state = ctx->ctx_state;
+ is_system = ctx->ctx_fl_system;
+
+ task = PFM_CTX_TASK(ctx);
+ regs = ia64_task_regs(task);
+
+ DPRINT(("ctx_state=%d is_current=%d\n",
+ state,
+ task == current ? 1 : 0));
+
+ /*
+ * if task == current, then pfm_flush() unloaded the context
+ */
+ if (state == PFM_CTX_UNLOADED) goto doit;
+
+ /*
+ * context is loaded/masked and task != current, we need to
+ * either force an unload or go zombie
+ */
+
+ /*
+ * The task is currently blocked or will block after an overflow.
+ * we must force it to wakeup to get out of the
+ * MASKED state and transition to the unloaded state by itself.
+ *
+ * This situation is only possible for per-task mode
+ */
+ if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) {
+
+ /*
+ * set a "partial" zombie state to be checked
+ * upon return from down() in pfm_handle_work().
+ *
+ * We cannot use the ZOMBIE state, because it is checked
+ * by pfm_load_regs() which is called upon wakeup from down().
+ * In such case, it would free the context and then we would
+ * return to pfm_handle_work() which would access the
+ * stale context. Instead, we set a flag invisible to pfm_load_regs()
+ * but visible to pfm_handle_work().
+ *
+ * For some window of time, we have a zombie context with
+ * ctx_state = MASKED and not ZOMBIE
+ */
+ ctx->ctx_fl_going_zombie = 1;
+
+ /*
+ * force task to wake up from MASKED state
+ */
+ up(&ctx->ctx_restart_sem);
+
+ DPRINT(("waking up ctx_state=%d\n", state));
+
+ /*
+ * put ourself to sleep waiting for the other
+ * task to report completion
+ *
+ * the context is protected by mutex, therefore there
+ * is no risk of being notified of completion before
+ * begin actually on the waitq.
+ */
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&ctx->ctx_zombieq, &wait);
+
+ UNPROTECT_CTX(ctx, flags);
+
+ /*
+ * XXX: check for signals :
+ * - ok for explicit close
+ * - not ok when coming from exit_files()
+ */
+ schedule();
+
+
+ PROTECT_CTX(ctx, flags);
+
+
+ remove_wait_queue(&ctx->ctx_zombieq, &wait);
+ set_current_state(TASK_RUNNING);
+
+ /*
+ * context is unloaded at this point
+ */
+ DPRINT(("after zombie wakeup ctx_state=%d for\n", state));
+ }
+ else if (task != current) {
+#ifdef CONFIG_SMP
+ /*
+ * switch context to zombie state
+ */
+ ctx->ctx_state = PFM_CTX_ZOMBIE;
+
+ DPRINT(("zombie ctx for [%d]\n", task->pid));
+ /*
+ * cannot free the context on the spot. deferred until
+ * the task notices the ZOMBIE state
+ */
+ free_possible = 0;
+#else
+ pfm_context_unload(ctx, NULL, 0, regs);
+#endif
+ }
+
+doit:
+ /* reload state, may have changed during opening of critical section */
+ state = ctx->ctx_state;
+
+ /*
+ * the context is still attached to a task (possibly current)
+ * we cannot destroy it right now
+ */
+
+ /*
+ * we must free the sampling buffer right here because
+ * we cannot rely on it being cleaned up later by the
+ * monitored task. It is not possible to free vmalloc'ed
+ * memory in pfm_load_regs(). Instead, we remove the buffer
+ * now. should there be subsequent PMU overflow originally
+ * meant for sampling, the will be converted to spurious
+ * and that's fine because the monitoring tools is gone anyway.
+ */
+ if (ctx->ctx_smpl_hdr) {
+ smpl_buf_addr = ctx->ctx_smpl_hdr;
+ smpl_buf_size = ctx->ctx_smpl_size;
+ /* no more sampling */
+ ctx->ctx_smpl_hdr = NULL;
+ ctx->ctx_fl_is_sampling = 0;
+ }
+
+ DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n",
+ state,
+ free_possible,
+ smpl_buf_addr,
+ smpl_buf_size));
+
+ if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt);
+
+ /*
+ * UNLOADED that the session has already been unreserved.
+ */
+ if (state == PFM_CTX_ZOMBIE) {
+ pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu);
+ }
+
+ /*
+ * disconnect file descriptor from context must be done
+ * before we unlock.
+ */
+ filp->private_data = NULL;
+
+ /*
+ * if we free on the spot, the context is now completely unreacheable
+ * from the callers side. The monitored task side is also cut, so we
+ * can freely cut.
+ *
+ * If we have a deferred free, only the caller side is disconnected.
+ */
+ UNPROTECT_CTX(ctx, flags);
+
+ /*
+ * All memory free operations (especially for vmalloc'ed memory)
+ * MUST be done with interrupts ENABLED.
+ */
+ if (smpl_buf_addr) pfm_rvfree(smpl_buf_addr, smpl_buf_size);
+
+ /*
+ * return the memory used by the context
+ */
+ if (free_possible) pfm_context_free(ctx);
+
+ return 0;
+}
+
+static int
+pfm_no_open(struct inode *irrelevant, struct file *dontcare)
+{
+ DPRINT(("pfm_no_open called\n"));
+ return -ENXIO;
+}
+
+
+
+static struct file_operations pfm_file_ops = {
+ .llseek = no_llseek,
+ .read = pfm_read,
+ .write = pfm_write,
+ .poll = pfm_poll,
+ .ioctl = pfm_ioctl,
+ .open = pfm_no_open, /* special open code to disallow open via /proc */
+ .fasync = pfm_fasync,
+ .release = pfm_close,
+ .flush = pfm_flush
+};
+
+static int
+pfmfs_delete_dentry(struct dentry *dentry)
+{
+ return 1;
+}
+
+static struct dentry_operations pfmfs_dentry_operations = {
+ .d_delete = pfmfs_delete_dentry,
+};
+
+
+static int
+pfm_alloc_fd(struct file **cfile)
+{
+ int fd, ret = 0;
+ struct file *file = NULL;
+ struct inode * inode;
+ char name[32];
+ struct qstr this;
+
+ fd = get_unused_fd();
+ if (fd < 0) return -ENFILE;
+
+ ret = -ENFILE;
+
+ file = get_empty_filp();
+ if (!file) goto out;
+
+ /*
+ * allocate a new inode
+ */
+ inode = new_inode(pfmfs_mnt->mnt_sb);
+ if (!inode) goto out;
+
+ DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode));
+
+ inode->i_mode = S_IFCHR|S_IRUGO;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+
+ sprintf(name, "[%lu]", inode->i_ino);
+ this.name = name;
+ this.len = strlen(name);
+ this.hash = inode->i_ino;
+
+ ret = -ENOMEM;
+
+ /*
+ * allocate a new dcache entry
+ */
+ file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
+ if (!file->f_dentry) goto out;
+
+ file->f_dentry->d_op = &pfmfs_dentry_operations;
+
+ d_add(file->f_dentry, inode);
+ file->f_vfsmnt = mntget(pfmfs_mnt);
+ file->f_mapping = inode->i_mapping;
+
+ file->f_op = &pfm_file_ops;
+ file->f_mode = FMODE_READ;
+ file->f_flags = O_RDONLY;
+ file->f_pos = 0;
+
+ /*
+ * may have to delay until context is attached?
+ */
+ fd_install(fd, file);
+
+ /*
+ * the file structure we will use
+ */
+ *cfile = file;
+
+ return fd;
+out:
+ if (file) put_filp(file);
+ put_unused_fd(fd);
+ return ret;
+}
+
+static void
+pfm_free_fd(int fd, struct file *file)
+{
+ struct files_struct *files = current->files;
+
+ /*
+ * there ie no fd_uninstall(), so we do it here
+ */
+ spin_lock(&files->file_lock);
+ files->fd[fd] = NULL;
+ spin_unlock(&files->file_lock);
+
+ if (file) put_filp(file);
+ put_unused_fd(fd);
+}
+
+static int
+pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size)
+{
+ DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size));
+
+ while (size > 0) {
+ unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT;
+
+
+ if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY))
+ return -ENOMEM;
+
+ addr += PAGE_SIZE;
+ buf += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ return 0;
+}
+
+/*
+ * allocate a sampling buffer and remaps it into the user address space of the task
+ */
+static int
+pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr)
+{
+ struct mm_struct *mm = task->mm;
+ struct vm_area_struct *vma = NULL;
+ unsigned long size;
+ void *smpl_buf;
+
+
+ /*
+ * the fixed header + requested size and align to page boundary
+ */
+ size = PAGE_ALIGN(rsize);
+
+ DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size));
+
+ /*
+ * check requested size to avoid Denial-of-service attacks
+ * XXX: may have to refine this test
+ * Check against address space limit.
+ *
+ * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur)
+ * return -ENOMEM;
+ */
+ if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
+ return -ENOMEM;
+
+ /*
+ * We do the easy to undo allocations first.
+ *
+ * pfm_rvmalloc(), clears the buffer, so there is no leak
+ */
+ smpl_buf = pfm_rvmalloc(size);
+ if (smpl_buf == NULL) {
+ DPRINT(("Can't allocate sampling buffer\n"));
+ return -ENOMEM;
+ }
+
+ DPRINT(("smpl_buf @%p\n", smpl_buf));
+
+ /* allocate vma */
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!vma) {
+ DPRINT(("Cannot allocate vma\n"));
+ goto error_kmem;
+ }
+ memset(vma, 0, sizeof(*vma));
+
+ /*
+ * partially initialize the vma for the sampling buffer
+ */
+ vma->vm_mm = mm;
+ vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED;
+ vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */
+
+ /*
+ * Now we have everything we need and we can initialize
+ * and connect all the data structures
+ */
+
+ ctx->ctx_smpl_hdr = smpl_buf;
+ ctx->ctx_smpl_size = size; /* aligned size */
+
+ /*
+ * Let's do the difficult operations next.
+ *
+ * now we atomically find some area in the address space and
+ * remap the buffer in it.
+ */
+ down_write(&task->mm->mmap_sem);
+
+ /* find some free area in address space, must have mmap sem held */
+ vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0);
+ if (vma->vm_start == 0UL) {
+ DPRINT(("Cannot find unmapped area for size %ld\n", size));
+ up_write(&task->mm->mmap_sem);
+ goto error;
+ }
+ vma->vm_end = vma->vm_start + size;
+ vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
+
+ DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start));
+
+ /* can only be applied to current task, need to have the mm semaphore held when called */
+ if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) {
+ DPRINT(("Can't remap buffer\n"));
+ up_write(&task->mm->mmap_sem);
+ goto error;
+ }
+
+ /*
+ * now insert the vma in the vm list for the process, must be
+ * done with mmap lock held
+ */
+ insert_vm_struct(mm, vma);
+
+ mm->total_vm += size >> PAGE_SHIFT;
+ vm_stat_account(vma);
+ up_write(&task->mm->mmap_sem);
+
+ /*
+ * keep track of user level virtual address
+ */
+ ctx->ctx_smpl_vaddr = (void *)vma->vm_start;
+ *(unsigned long *)user_vaddr = vma->vm_start;
+
+ return 0;
+
+error:
+ kmem_cache_free(vm_area_cachep, vma);
+error_kmem:
+ pfm_rvfree(smpl_buf, size);
+
+ return -ENOMEM;
+}
+
+/*
+ * XXX: do something better here
+ */
+static int
+pfm_bad_permissions(struct task_struct *task)
+{
+ /* inspired by ptrace_attach() */
+ DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n",
+ current->uid,
+ current->gid,
+ task->euid,
+ task->suid,
+ task->uid,
+ task->egid,
+ task->sgid));
+
+ return ((current->uid != task->euid)
+ || (current->uid != task->suid)
+ || (current->uid != task->uid)
+ || (current->gid != task->egid)
+ || (current->gid != task->sgid)
+ || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE);
+}
+
+static int
+pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx)
+{
+ int ctx_flags;
+
+ /* valid signal */
+
+ ctx_flags = pfx->ctx_flags;
+
+ if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
+
+ /*
+ * cannot block in this mode
+ */
+ if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
+ DPRINT(("cannot use blocking mode when in system wide monitoring\n"));
+ return -EINVAL;
+ }
+ } else {
+ }
+ /* probably more to add here */
+
+ return 0;
+}
+
+static int
+pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int ctx_flags,
+ unsigned int cpu, pfarg_context_t *arg)
+{
+ pfm_buffer_fmt_t *fmt = NULL;
+ unsigned long size = 0UL;
+ void *uaddr = NULL;
+ void *fmt_arg = NULL;
+ int ret = 0;
+#define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1)
+
+ /* invoke and lock buffer format, if found */
+ fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id);
+ if (fmt == NULL) {
+ DPRINT(("[%d] cannot find buffer format\n", task->pid));
+ return -EINVAL;
+ }
+
+ /*
+ * buffer argument MUST be contiguous to pfarg_context_t
+ */
+ if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg);
+
+ ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg);
+
+ DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret));
+
+ if (ret) goto error;
+
+ /* link buffer format and context */
+ ctx->ctx_buf_fmt = fmt;
+
+ /*
+ * check if buffer format wants to use perfmon buffer allocation/mapping service
+ */
+ ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size);
+ if (ret) goto error;
+
+ if (size) {
+ /*
+ * buffer is always remapped into the caller's address space
+ */
+ ret = pfm_smpl_buffer_alloc(current, ctx, size, &uaddr);
+ if (ret) goto error;
+
+ /* keep track of user address of buffer */
+ arg->ctx_smpl_vaddr = uaddr;
+ }
+ ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg);
+
+error:
+ return ret;
+}
+
+static void
+pfm_reset_pmu_state(pfm_context_t *ctx)
+{
+ int i;
+
+ /*
+ * install reset values for PMC.
+ */
+ for (i=1; PMC_IS_LAST(i) == 0; i++) {
+ if (PMC_IS_IMPL(i) == 0) continue;
+ ctx->ctx_pmcs[i] = PMC_DFL_VAL(i);
+ DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i]));
+ }
+ /*
+ * PMD registers are set to 0UL when the context in memset()
+ */
+
+ /*
+ * On context switched restore, we must restore ALL pmc and ALL pmd even
+ * when they are not actively used by the task. In UP, the incoming process
+ * may otherwise pick up left over PMC, PMD state from the previous process.
+ * As opposed to PMD, stale PMC can cause harm to the incoming
+ * process because they may change what is being measured.
+ * Therefore, we must systematically reinstall the entire
+ * PMC state. In SMP, the same thing is possible on the
+ * same CPU but also on between 2 CPUs.
+ *
+ * The problem with PMD is information leaking especially
+ * to user level when psr.sp=0
+ *
+ * There is unfortunately no easy way to avoid this problem
+ * on either UP or SMP. This definitively slows down the
+ * pfm_load_regs() function.
+ */
+
+ /*
+ * bitmask of all PMCs accessible to this context
+ *
+ * PMC0 is treated differently.
+ */
+ ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1;
+
+ /*
+ * bitmask of all PMDs that are accesible to this context
+ */
+ ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0];
+
+ DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0]));
+
+ /*
+ * useful in case of re-enable after disable
+ */
+ ctx->ctx_used_ibrs[0] = 0UL;
+ ctx->ctx_used_dbrs[0] = 0UL;
+}
+
+static int
+pfm_ctx_getsize(void *arg, size_t *sz)
+{
+ pfarg_context_t *req = (pfarg_context_t *)arg;
+ pfm_buffer_fmt_t *fmt;
+
+ *sz = 0;
+
+ if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0;
+
+ fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id);
+ if (fmt == NULL) {
+ DPRINT(("cannot find buffer format\n"));
+ return -EINVAL;
+ }
+ /* get just enough to copy in user parameters */
+ *sz = fmt->fmt_arg_size;
+ DPRINT(("arg_size=%lu\n", *sz));
+
+ return 0;
+}
+
+
+
+/*
+ * cannot attach if :
+ * - kernel task
+ * - task not owned by caller
+ * - task incompatible with context mode
+ */
+static int
+pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
+{
+ /*
+ * no kernel task or task not owner by caller
+ */
+ if (task->mm == NULL) {
+ DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid));
+ return -EPERM;
+ }
+ if (pfm_bad_permissions(task)) {
+ DPRINT(("no permission to attach to [%d]\n", task->pid));
+ return -EPERM;
+ }
+ /*
+ * cannot block in self-monitoring mode
+ */
+ if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) {
+ DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid));
+ return -EINVAL;
+ }
+
+ if (task->exit_state == EXIT_ZOMBIE) {
+ DPRINT(("cannot attach to zombie task [%d]\n", task->pid));
+ return -EBUSY;
+ }
+
+ /*
+ * always ok for self
+ */
+ if (task == current) return 0;
+
+ if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
+ DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state));
+ return -EBUSY;
+ }
+ /*
+ * make sure the task is off any CPU
+ */
+ wait_task_inactive(task);
+
+ /* more to come... */
+
+ return 0;
+}
+
+static int
+pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task)
+{
+ struct task_struct *p = current;
+ int ret;
+
+ /* XXX: need to add more checks here */
+ if (pid < 2) return -EPERM;
+
+ if (pid != current->pid) {
+
+ read_lock(&tasklist_lock);
+
+ p = find_task_by_pid(pid);
+
+ /* make sure task cannot go away while we operate on it */
+ if (p) get_task_struct(p);
+
+ read_unlock(&tasklist_lock);
+
+ if (p == NULL) return -ESRCH;
+ }
+
+ ret = pfm_task_incompatible(ctx, p);
+ if (ret == 0) {
+ *task = p;
+ } else if (p != current) {
+ pfm_put_task(p);
+ }
+ return ret;
+}
+
+
+
+static int
+pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ pfarg_context_t *req = (pfarg_context_t *)arg;
+ struct file *filp;
+ int ctx_flags;
+ int ret;
+
+ /* let's check the arguments first */
+ ret = pfarg_is_sane(current, req);
+ if (ret < 0) return ret;
+
+ ctx_flags = req->ctx_flags;
+
+ ret = -ENOMEM;
+
+ ctx = pfm_context_alloc();
+ if (!ctx) goto error;
+
+ ret = pfm_alloc_fd(&filp);
+ if (ret < 0) goto error_file;
+
+ req->ctx_fd = ctx->ctx_fd = ret;
+
+ /*
+ * attach context to file
+ */
+ filp->private_data = ctx;
+
+ /*
+ * does the user want to sample?
+ */
+ if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) {
+ ret = pfm_setup_buffer_fmt(current, ctx, ctx_flags, 0, req);
+ if (ret) goto buffer_error;
+ }
+
+ /*
+ * init context protection lock
+ */
+ spin_lock_init(&ctx->ctx_lock);
+
+ /*
+ * context is unloaded
+ */
+ ctx->ctx_state = PFM_CTX_UNLOADED;
+
+ /*
+ * initialization of context's flags
+ */
+ ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
+ ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
+ ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */
+ ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
+ /*
+ * will move to set properties
+ * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
+ */
+
+ /*
+ * init restart semaphore to locked
+ */
+ sema_init(&ctx->ctx_restart_sem, 0);
+
+ /*
+ * activation is used in SMP only
+ */
+ ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
+ SET_LAST_CPU(ctx, -1);
+
+ /*
+ * initialize notification message queue
+ */
+ ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
+ init_waitqueue_head(&ctx->ctx_msgq_wait);
+ init_waitqueue_head(&ctx->ctx_zombieq);
+
+ DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n",
+ ctx,
+ ctx_flags,
+ ctx->ctx_fl_system,
+ ctx->ctx_fl_block,
+ ctx->ctx_fl_excl_idle,
+ ctx->ctx_fl_no_msg,
+ ctx->ctx_fd));
+
+ /*
+ * initialize soft PMU state
+ */
+ pfm_reset_pmu_state(ctx);
+
+ return 0;
+
+buffer_error:
+ pfm_free_fd(ctx->ctx_fd, filp);
+
+ if (ctx->ctx_buf_fmt) {
+ pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs);
+ }
+error_file:
+ pfm_context_free(ctx);
+
+error:
+ return ret;
+}
+
+static inline unsigned long
+pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
+{
+ unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset;
+ unsigned long new_seed, old_seed = reg->seed, mask = reg->mask;
+ extern unsigned long carta_random32 (unsigned long seed);
+
+ if (reg->flags & PFM_REGFL_RANDOM) {
+ new_seed = carta_random32(old_seed);
+ val -= (old_seed & mask); /* counter values are negative numbers! */
+ if ((mask >> 32) != 0)
+ /* construct a full 64-bit random value: */
+ new_seed |= carta_random32(old_seed >> 32) << 32;
+ reg->seed = new_seed;
+ }
+ reg->lval = val;
+ return val;
+}
+
+static void
+pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
+{
+ unsigned long mask = ovfl_regs[0];
+ unsigned long reset_others = 0UL;
+ unsigned long val;
+ int i;
+
+ /*
+ * now restore reset value on sampling overflowed counters
+ */
+ mask >>= PMU_FIRST_COUNTER;
+ for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
+
+ if ((mask & 0x1UL) == 0UL) continue;
+
+ ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
+ reset_others |= ctx->ctx_pmds[i].reset_pmds[0];
+
+ DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));
+ }
+
+ /*
+ * Now take care of resetting the other registers
+ */
+ for(i = 0; reset_others; i++, reset_others >>= 1) {
+
+ if ((reset_others & 0x1) == 0) continue;
+
+ ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);
+
+ DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
+ is_long_reset ? "long" : "short", i, val));
+ }
+}
+
+static void
+pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
+{
+ unsigned long mask = ovfl_regs[0];
+ unsigned long reset_others = 0UL;
+ unsigned long val;
+ int i;
+
+ DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset));
+
+ if (ctx->ctx_state == PFM_CTX_MASKED) {
+ pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset);
+ return;
+ }
+
+ /*
+ * now restore reset value on sampling overflowed counters
+ */
+ mask >>= PMU_FIRST_COUNTER;
+ for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
+
+ if ((mask & 0x1UL) == 0UL) continue;
+
+ val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
+ reset_others |= ctx->ctx_pmds[i].reset_pmds[0];
+
+ DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));
+
+ pfm_write_soft_counter(ctx, i, val);
+ }
+
+ /*
+ * Now take care of resetting the other registers
+ */
+ for(i = 0; reset_others; i++, reset_others >>= 1) {
+
+ if ((reset_others & 0x1) == 0) continue;
+
+ val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);
+
+ if (PMD_IS_COUNTING(i)) {
+ pfm_write_soft_counter(ctx, i, val);
+ } else {
+ ia64_set_pmd(i, val);
+ }
+ DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
+ is_long_reset ? "long" : "short", i, val));
+ }
+ ia64_srlz_d();
+}
+
+static int
+pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ struct thread_struct *thread = NULL;
+ struct task_struct *task;
+ pfarg_reg_t *req = (pfarg_reg_t *)arg;
+ unsigned long value, pmc_pm;
+ unsigned long smpl_pmds, reset_pmds, impl_pmds;
+ unsigned int cnum, reg_flags, flags, pmc_type;
+ int i, can_access_pmu = 0, is_loaded, is_system, expert_mode;
+ int is_monitor, is_counting, state;
+ int ret = -EINVAL;
+ pfm_reg_check_t wr_func;
+#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z))
+
+ state = ctx->ctx_state;
+ is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
+ is_system = ctx->ctx_fl_system;
+ task = ctx->ctx_task;
+ impl_pmds = pmu_conf->impl_pmds[0];
+
+ if (state == PFM_CTX_ZOMBIE) return -EINVAL;
+
+ if (is_loaded) {
+ thread = &task->thread;
+ /*
+ * In system wide and when the context is loaded, access can only happen
+ * when the caller is running on the CPU being monitored by the session.
+ * It does not have to be the owner (ctx_task) of the context per se.
+ */
+ if (is_system && ctx->ctx_cpu != smp_processor_id()) {
+ DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+ return -EBUSY;
+ }
+ can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
+ }
+ expert_mode = pfm_sysctl.expert_mode;
+
+ for (i = 0; i < count; i++, req++) {
+
+ cnum = req->reg_num;
+ reg_flags = req->reg_flags;
+ value = req->reg_value;
+ smpl_pmds = req->reg_smpl_pmds[0];
+ reset_pmds = req->reg_reset_pmds[0];
+ flags = 0;
+
+
+ if (cnum >= PMU_MAX_PMCS) {
+ DPRINT(("pmc%u is invalid\n", cnum));
+ goto error;
+ }
+
+ pmc_type = pmu_conf->pmc_desc[cnum].type;
+ pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1;
+ is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0;
+ is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0;
+
+ /*
+ * we reject all non implemented PMC as well
+ * as attempts to modify PMC[0-3] which are used
+ * as status registers by the PMU
+ */
+ if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) {
+ DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type));
+ goto error;
+ }
+ wr_func = pmu_conf->pmc_desc[cnum].write_check;
+ /*
+ * If the PMC is a monitor, then if the value is not the default:
+ * - system-wide session: PMCx.pm=1 (privileged monitor)
+ * - per-task : PMCx.pm=0 (user monitor)
+ */
+ if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) {
+ DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n",
+ cnum,
+ pmc_pm,
+ is_system));
+ goto error;
+ }
+
+ if (is_counting) {
+ /*
+ * enforce generation of overflow interrupt. Necessary on all
+ * CPUs.
+ */
+ value |= 1 << PMU_PMC_OI;
+
+ if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
+ flags |= PFM_REGFL_OVFL_NOTIFY;
+ }
+
+ if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;
+
+ /* verify validity of smpl_pmds */
+ if ((smpl_pmds & impl_pmds) != smpl_pmds) {
+ DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum));
+ goto error;
+ }
+
+ /* verify validity of reset_pmds */
+ if ((reset_pmds & impl_pmds) != reset_pmds) {
+ DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum));
+ goto error;
+ }
+ } else {
+ if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
+ DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum));
+ goto error;
+ }
+ /* eventid on non-counting monitors are ignored */
+ }
+
+ /*
+ * execute write checker, if any
+ */
+ if (likely(expert_mode == 0 && wr_func)) {
+ ret = (*wr_func)(task, ctx, cnum, &value, regs);
+ if (ret) goto error;
+ ret = -EINVAL;
+ }
+
+ /*
+ * no error on this register
+ */
+ PFM_REG_RETFLAG_SET(req->reg_flags, 0);
+
+ /*
+ * Now we commit the changes to the software state
+ */
+
+ /*
+ * update overflow information
+ */
+ if (is_counting) {
+ /*
+ * full flag update each time a register is programmed
+ */
+ ctx->ctx_pmds[cnum].flags = flags;
+
+ ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds;
+ ctx->ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds;
+ ctx->ctx_pmds[cnum].eventid = req->reg_smpl_eventid;
+
+ /*
+ * Mark all PMDS to be accessed as used.
+ *
+ * We do not keep track of PMC because we have to
+ * systematically restore ALL of them.
+ *
+ * We do not update the used_monitors mask, because
+ * if we have not programmed them, then will be in
+ * a quiescent state, therefore we will not need to
+ * mask/restore then when context is MASKED.
+ */
+ CTX_USED_PMD(ctx, reset_pmds);
+ CTX_USED_PMD(ctx, smpl_pmds);
+ /*
+ * make sure we do not try to reset on
+ * restart because we have established new values
+ */
+ if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
+ }
+ /*
+ * Needed in case the user does not initialize the equivalent
+ * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no
+ * possible leak here.
+ */
+ CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]);
+
+ /*
+ * keep track of the monitor PMC that we are using.
+ * we save the value of the pmc in ctx_pmcs[] and if
+ * the monitoring is not stopped for the context we also
+ * place it in the saved state area so that it will be
+ * picked up later by the context switch code.
+ *
+ * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs().
+ *
+ * The value in thread->pmcs[] may be modified on overflow, i.e., when
+ * monitoring needs to be stopped.
+ */
+ if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum);
+
+ /*
+ * update context state
+ */
+ ctx->ctx_pmcs[cnum] = value;
+
+ if (is_loaded) {
+ /*
+ * write thread state
+ */
+ if (is_system == 0) thread->pmcs[cnum] = value;
+
+ /*
+ * write hardware register if we can
+ */
+ if (can_access_pmu) {
+ ia64_set_pmc(cnum, value);
+ }
+#ifdef CONFIG_SMP
+ else {
+ /*
+ * per-task SMP only here
+ *
+ * we are guaranteed that the task is not running on the other CPU,
+ * we indicate that this PMD will need to be reloaded if the task
+ * is rescheduled on the CPU it ran last on.
+ */
+ ctx->ctx_reload_pmcs[0] |= 1UL << cnum;
+ }
+#endif
+ }
+
+ DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n",
+ cnum,
+ value,
+ is_loaded,
+ can_access_pmu,
+ flags,
+ ctx->ctx_all_pmcs[0],
+ ctx->ctx_used_pmds[0],
+ ctx->ctx_pmds[cnum].eventid,
+ smpl_pmds,
+ reset_pmds,
+ ctx->ctx_reload_pmcs[0],
+ ctx->ctx_used_monitors[0],
+ ctx->ctx_ovfl_regs[0]));
+ }
+
+ /*
+ * make sure the changes are visible
+ */
+ if (can_access_pmu) ia64_srlz_d();
+
+ return 0;
+error:
+ PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
+ return ret;
+}
+
+static int
+pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ struct thread_struct *thread = NULL;
+ struct task_struct *task;
+ pfarg_reg_t *req = (pfarg_reg_t *)arg;
+ unsigned long value, hw_value, ovfl_mask;
+ unsigned int cnum;
+ int i, can_access_pmu = 0, state;
+ int is_counting, is_loaded, is_system, expert_mode;
+ int ret = -EINVAL;
+ pfm_reg_check_t wr_func;
+
+
+ state = ctx->ctx_state;
+ is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
+ is_system = ctx->ctx_fl_system;
+ ovfl_mask = pmu_conf->ovfl_val;
+ task = ctx->ctx_task;
+
+ if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL;
+
+ /*
+ * on both UP and SMP, we can only write to the PMC when the task is
+ * the owner of the local PMU.
+ */
+ if (likely(is_loaded)) {
+ thread = &task->thread;
+ /*
+ * In system wide and when the context is loaded, access can only happen
+ * when the caller is running on the CPU being monitored by the session.
+ * It does not have to be the owner (ctx_task) of the context per se.
+ */
+ if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
+ DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+ return -EBUSY;
+ }
+ can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
+ }
+ expert_mode = pfm_sysctl.expert_mode;
+
+ for (i = 0; i < count; i++, req++) {
+
+ cnum = req->reg_num;
+ value = req->reg_value;
+
+ if (!PMD_IS_IMPL(cnum)) {
+ DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum));
+ goto abort_mission;
+ }
+ is_counting = PMD_IS_COUNTING(cnum);
+ wr_func = pmu_conf->pmd_desc[cnum].write_check;
+
+ /*
+ * execute write checker, if any
+ */
+ if (unlikely(expert_mode == 0 && wr_func)) {
+ unsigned long v = value;
+
+ ret = (*wr_func)(task, ctx, cnum, &v, regs);
+ if (ret) goto abort_mission;
+
+ value = v;
+ ret = -EINVAL;
+ }
+
+ /*
+ * no error on this register
+ */
+ PFM_REG_RETFLAG_SET(req->reg_flags, 0);
+
+ /*
+ * now commit changes to software state
+ */
+ hw_value = value;
+
+ /*
+ * update virtualized (64bits) counter
+ */
+ if (is_counting) {
+ /*
+ * write context state
+ */
+ ctx->ctx_pmds[cnum].lval = value;
+
+ /*
+ * when context is load we use the split value
+ */
+ if (is_loaded) {
+ hw_value = value & ovfl_mask;
+ value = value & ~ovfl_mask;
+ }
+ }
+ /*
+ * update reset values (not just for counters)
+ */
+ ctx->ctx_pmds[cnum].long_reset = req->reg_long_reset;
+ ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset;
+
+ /*
+ * update randomization parameters (not just for counters)
+ */
+ ctx->ctx_pmds[cnum].seed = req->reg_random_seed;
+ ctx->ctx_pmds[cnum].mask = req->reg_random_mask;
+
+ /*
+ * update context value
+ */
+ ctx->ctx_pmds[cnum].val = value;
+
+ /*
+ * Keep track of what we use
+ *
+ * We do not keep track of PMC because we have to
+ * systematically restore ALL of them.
+ */
+ CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum));
+
+ /*
+ * mark this PMD register used as well
+ */
+ CTX_USED_PMD(ctx, RDEP(cnum));
+
+ /*
+ * make sure we do not try to reset on
+ * restart because we have established new values
+ */
+ if (is_counting && state == PFM_CTX_MASKED) {
+ ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
+ }
+
+ if (is_loaded) {
+ /*
+ * write thread state
+ */
+ if (is_system == 0) thread->pmds[cnum] = hw_value;
+
+ /*
+ * write hardware register if we can
+ */
+ if (can_access_pmu) {
+ ia64_set_pmd(cnum, hw_value);
+ } else {
+#ifdef CONFIG_SMP
+ /*
+ * we are guaranteed that the task is not running on the other CPU,
+ * we indicate that this PMD will need to be reloaded if the task
+ * is rescheduled on the CPU it ran last on.
+ */
+ ctx->ctx_reload_pmds[0] |= 1UL << cnum;
+#endif
+ }
+ }
+
+ DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx "
+ "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n",
+ cnum,
+ value,
+ is_loaded,
+ can_access_pmu,
+ hw_value,
+ ctx->ctx_pmds[cnum].val,
+ ctx->ctx_pmds[cnum].short_reset,
+ ctx->ctx_pmds[cnum].long_reset,
+ PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N',
+ ctx->ctx_pmds[cnum].seed,
+ ctx->ctx_pmds[cnum].mask,
+ ctx->ctx_used_pmds[0],
+ ctx->ctx_pmds[cnum].reset_pmds[0],
+ ctx->ctx_reload_pmds[0],
+ ctx->ctx_all_pmds[0],
+ ctx->ctx_ovfl_regs[0]));
+ }
+
+ /*
+ * make changes visible
+ */
+ if (can_access_pmu) ia64_srlz_d();
+
+ return 0;
+
+abort_mission:
+ /*
+ * for now, we have only one possibility for error
+ */
+ PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
+ return ret;
+}
+
+/*
+ * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function.
+ * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an
+ * interrupt is delivered during the call, it will be kept pending until we leave, making
+ * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are
+ * guaranteed to return consistent data to the user, it may simply be old. It is not
+ * trivial to treat the overflow while inside the call because you may end up in
+ * some module sampling buffer code causing deadlocks.
+ */
+static int
+pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ struct thread_struct *thread = NULL;
+ struct task_struct *task;
+ unsigned long val = 0UL, lval, ovfl_mask, sval;
+ pfarg_reg_t *req = (pfarg_reg_t *)arg;
+ unsigned int cnum, reg_flags = 0;
+ int i, can_access_pmu = 0, state;
+ int is_loaded, is_system, is_counting, expert_mode;
+ int ret = -EINVAL;
+ pfm_reg_check_t rd_func;
+
+ /*
+ * access is possible when loaded only for
+ * self-monitoring tasks or in UP mode
+ */
+
+ state = ctx->ctx_state;
+ is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
+ is_system = ctx->ctx_fl_system;
+ ovfl_mask = pmu_conf->ovfl_val;
+ task = ctx->ctx_task;
+
+ if (state == PFM_CTX_ZOMBIE) return -EINVAL;
+
+ if (likely(is_loaded)) {
+ thread = &task->thread;
+ /*
+ * In system wide and when the context is loaded, access can only happen
+ * when the caller is running on the CPU being monitored by the session.
+ * It does not have to be the owner (ctx_task) of the context per se.
+ */
+ if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
+ DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+ return -EBUSY;
+ }
+ /*
+ * this can be true when not self-monitoring only in UP
+ */
+ can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
+
+ if (can_access_pmu) ia64_srlz_d();
+ }
+ expert_mode = pfm_sysctl.expert_mode;
+
+ DPRINT(("ld=%d apmu=%d ctx_state=%d\n",
+ is_loaded,
+ can_access_pmu,
+ state));
+
+ /*
+ * on both UP and SMP, we can only read the PMD from the hardware register when
+ * the task is the owner of the local PMU.
+ */
+
+ for (i = 0; i < count; i++, req++) {
+
+ cnum = req->reg_num;
+ reg_flags = req->reg_flags;
+
+ if (unlikely(!PMD_IS_IMPL(cnum))) goto error;
+ /*
+ * we can only read the register that we use. That includes
+ * the one we explicitely initialize AND the one we want included
+ * in the sampling buffer (smpl_regs).
+ *
+ * Having this restriction allows optimization in the ctxsw routine
+ * without compromising security (leaks)
+ */
+ if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error;
+
+ sval = ctx->ctx_pmds[cnum].val;
+ lval = ctx->ctx_pmds[cnum].lval;
+ is_counting = PMD_IS_COUNTING(cnum);
+
+ /*
+ * If the task is not the current one, then we check if the
+ * PMU state is still in the local live register due to lazy ctxsw.
+ * If true, then we read directly from the registers.
+ */
+ if (can_access_pmu){
+ val = ia64_get_pmd(cnum);
+ } else {
+ /*
+ * context has been saved
+ * if context is zombie, then task does not exist anymore.
+ * In this case, we use the full value saved in the context (pfm_flush_regs()).
+ */
+ val = is_loaded ? thread->pmds[cnum] : 0UL;
+ }
+ rd_func = pmu_conf->pmd_desc[cnum].read_check;
+
+ if (is_counting) {
+ /*
+ * XXX: need to check for overflow when loaded
+ */
+ val &= ovfl_mask;
+ val += sval;
+ }
+
+ /*
+ * execute read checker, if any
+ */
+ if (unlikely(expert_mode == 0 && rd_func)) {
+ unsigned long v = val;
+ ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs);
+ if (ret) goto error;
+ val = v;
+ ret = -EINVAL;
+ }
+
+ PFM_REG_RETFLAG_SET(reg_flags, 0);
+
+ DPRINT(("pmd[%u]=0x%lx\n", cnum, val));
+
+ /*
+ * update register return value, abort all if problem during copy.
+ * we only modify the reg_flags field. no check mode is fine because
+ * access has been verified upfront in sys_perfmonctl().
+ */
+ req->reg_value = val;
+ req->reg_flags = reg_flags;
+ req->reg_last_reset_val = lval;
+ }
+
+ return 0;
+
+error:
+ PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
+ return ret;
+}
+
+int
+pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
+{
+ pfm_context_t *ctx;
+
+ if (req == NULL) return -EINVAL;
+
+ ctx = GET_PMU_CTX();
+
+ if (ctx == NULL) return -EINVAL;
+
+ /*
+ * for now limit to current task, which is enough when calling
+ * from overflow handler
+ */
+ if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
+
+ return pfm_write_pmcs(ctx, req, nreq, regs);
+}
+EXPORT_SYMBOL(pfm_mod_write_pmcs);
+
+int
+pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
+{
+ pfm_context_t *ctx;
+
+ if (req == NULL) return -EINVAL;
+
+ ctx = GET_PMU_CTX();
+
+ if (ctx == NULL) return -EINVAL;
+
+ /*
+ * for now limit to current task, which is enough when calling
+ * from overflow handler
+ */
+ if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
+
+ return pfm_read_pmds(ctx, req, nreq, regs);
+}
+EXPORT_SYMBOL(pfm_mod_read_pmds);
+
+/*
+ * Only call this function when a process it trying to
+ * write the debug registers (reading is always allowed)
+ */
+int
+pfm_use_debug_registers(struct task_struct *task)
+{
+ pfm_context_t *ctx = task->thread.pfm_context;
+ unsigned long flags;
+ int ret = 0;
+
+ if (pmu_conf->use_rr_dbregs == 0) return 0;
+
+ DPRINT(("called for [%d]\n", task->pid));
+
+ /*
+ * do it only once
+ */
+ if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0;
+
+ /*
+ * Even on SMP, we do not need to use an atomic here because
+ * the only way in is via ptrace() and this is possible only when the
+ * process is stopped. Even in the case where the ctxsw out is not totally
+ * completed by the time we come here, there is no way the 'stopped' process
+ * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine.
+ * So this is always safe.
+ */
+ if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1;
+
+ LOCK_PFS(flags);
+
+ /*
+ * We cannot allow setting breakpoints when system wide monitoring
+ * sessions are using the debug registers.
+ */
+ if (pfm_sessions.pfs_sys_use_dbregs> 0)
+ ret = -1;
+ else
+ pfm_sessions.pfs_ptrace_use_dbregs++;
+
+ DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n",
+ pfm_sessions.pfs_ptrace_use_dbregs,
+ pfm_sessions.pfs_sys_use_dbregs,
+ task->pid, ret));
+
+ UNLOCK_PFS(flags);
+
+ return ret;
+}
+
+/*
+ * This function is called for every task that exits with the
+ * IA64_THREAD_DBG_VALID set. This indicates a task which was
+ * able to use the debug registers for debugging purposes via
+ * ptrace(). Therefore we know it was not using them for
+ * perfmormance monitoring, so we only decrement the number
+ * of "ptraced" debug register users to keep the count up to date
+ */
+int
+pfm_release_debug_registers(struct task_struct *task)
+{
+ unsigned long flags;
+ int ret;
+
+ if (pmu_conf->use_rr_dbregs == 0) return 0;
+
+ LOCK_PFS(flags);
+ if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
+ printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid);
+ ret = -1;
+ } else {
+ pfm_sessions.pfs_ptrace_use_dbregs--;
+ ret = 0;
+ }
+ UNLOCK_PFS(flags);
+
+ return ret;
+}
+
+static int
+pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ struct task_struct *task;
+ pfm_buffer_fmt_t *fmt;
+ pfm_ovfl_ctrl_t rst_ctrl;
+ int state, is_system;
+ int ret = 0;
+
+ state = ctx->ctx_state;
+ fmt = ctx->ctx_buf_fmt;
+ is_system = ctx->ctx_fl_system;
+ task = PFM_CTX_TASK(ctx);
+
+ switch(state) {
+ case PFM_CTX_MASKED:
+ break;
+ case PFM_CTX_LOADED:
+ if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break;
+ /* fall through */
+ case PFM_CTX_UNLOADED:
+ case PFM_CTX_ZOMBIE:
+ DPRINT(("invalid state=%d\n", state));
+ return -EBUSY;
+ default:
+ DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state));
+ return -EINVAL;
+ }
+
+ /*
+ * In system wide and when the context is loaded, access can only happen
+ * when the caller is running on the CPU being monitored by the session.
+ * It does not have to be the owner (ctx_task) of the context per se.
+ */
+ if (is_system && ctx->ctx_cpu != smp_processor_id()) {
+ DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+ return -EBUSY;
+ }
+
+ /* sanity check */
+ if (unlikely(task == NULL)) {
+ printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid);
+ return -EINVAL;
+ }
+
+ if (task == current || is_system) {
+
+ fmt = ctx->ctx_buf_fmt;
+
+ DPRINT(("restarting self %d ovfl=0x%lx\n",
+ task->pid,
+ ctx->ctx_ovfl_regs[0]));
+
+ if (CTX_HAS_SMPL(ctx)) {
+
+ prefetch(ctx->ctx_smpl_hdr);
+
+ rst_ctrl.bits.mask_monitoring = 0;
+ rst_ctrl.bits.reset_ovfl_pmds = 0;
+
+ if (state == PFM_CTX_LOADED)
+ ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
+ else
+ ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
+ } else {
+ rst_ctrl.bits.mask_monitoring = 0;
+ rst_ctrl.bits.reset_ovfl_pmds = 1;
+ }
+
+ if (ret == 0) {
+ if (rst_ctrl.bits.reset_ovfl_pmds)
+ pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
+
+ if (rst_ctrl.bits.mask_monitoring == 0) {
+ DPRINT(("resuming monitoring for [%d]\n", task->pid));
+
+ if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task);
+ } else {
+ DPRINT(("keeping monitoring stopped for [%d]\n", task->pid));
+
+ // cannot use pfm_stop_monitoring(task, regs);
+ }
+ }
+ /*
+ * clear overflowed PMD mask to remove any stale information
+ */
+ ctx->ctx_ovfl_regs[0] = 0UL;
+
+ /*
+ * back to LOADED state
+ */
+ ctx->ctx_state = PFM_CTX_LOADED;
+
+ /*
+ * XXX: not really useful for self monitoring
+ */
+ ctx->ctx_fl_can_restart = 0;
+
+ return 0;
+ }
+
+ /*
+ * restart another task
+ */
+
+ /*
+ * When PFM_CTX_MASKED, we cannot issue a restart before the previous
+ * one is seen by the task.
+ */
+ if (state == PFM_CTX_MASKED) {
+ if (ctx->ctx_fl_can_restart == 0) return -EINVAL;
+ /*
+ * will prevent subsequent restart before this one is
+ * seen by other task
+ */
+ ctx->ctx_fl_can_restart = 0;
+ }
+
+ /*
+ * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e.
+ * the task is blocked or on its way to block. That's the normal
+ * restart path. If the monitoring is not masked, then the task
+ * can be actively monitoring and we cannot directly intervene.
+ * Therefore we use the trap mechanism to catch the task and
+ * force it to reset the buffer/reset PMDs.
+ *
+ * if non-blocking, then we ensure that the task will go into
+ * pfm_handle_work() before returning to user mode.
+ *
+ * We cannot explicitely reset another task, it MUST always
+ * be done by the task itself. This works for system wide because
+ * the tool that is controlling the session is logically doing
+ * "self-monitoring".
+ */
+ if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
+ DPRINT(("unblocking [%d] \n", task->pid));
+ up(&ctx->ctx_restart_sem);
+ } else {
+ DPRINT(("[%d] armed exit trap\n", task->pid));
+
+ ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
+
+ PFM_SET_WORK_PENDING(task, 1);
+
+ pfm_set_task_notify(task);
+
+ /*
+ * XXX: send reschedule if task runs on another CPU
+ */
+ }
+ return 0;
+}
+
+static int
+pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ unsigned int m = *(unsigned int *)arg;
+
+ pfm_sysctl.debug = m == 0 ? 0 : 1;
+
+ pfm_debug_var = pfm_sysctl.debug;
+
+ printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off");
+
+ if (m == 0) {
+ memset(pfm_stats, 0, sizeof(pfm_stats));
+ for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL;
+ }
+ return 0;
+}
+
+/*
+ * arg can be NULL and count can be zero for this function
+ */
+static int
+pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ struct thread_struct *thread = NULL;
+ struct task_struct *task;
+ pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg;
+ unsigned long flags;
+ dbreg_t dbreg;
+ unsigned int rnum;
+ int first_time;
+ int ret = 0, state;
+ int i, can_access_pmu = 0;
+ int is_system, is_loaded;
+
+ if (pmu_conf->use_rr_dbregs == 0) return -EINVAL;
+
+ state = ctx->ctx_state;
+ is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
+ is_system = ctx->ctx_fl_system;
+ task = ctx->ctx_task;
+
+ if (state == PFM_CTX_ZOMBIE) return -EINVAL;
+
+ /*
+ * on both UP and SMP, we can only write to the PMC when the task is
+ * the owner of the local PMU.
+ */
+ if (is_loaded) {
+ thread = &task->thread;
+ /*
+ * In system wide and when the context is loaded, access can only happen
+ * when the caller is running on the CPU being monitored by the session.
+ * It does not have to be the owner (ctx_task) of the context per se.
+ */
+ if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
+ DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+ return -EBUSY;
+ }
+ can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
+ }
+
+ /*
+ * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w
+ * ensuring that no real breakpoint can be installed via this call.
+ *
+ * IMPORTANT: regs can be NULL in this function
+ */
+
+ first_time = ctx->ctx_fl_using_dbreg == 0;
+
+ /*
+ * don't bother if we are loaded and task is being debugged
+ */
+ if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
+ DPRINT(("debug registers already in use for [%d]\n", task->pid));
+ return -EBUSY;
+ }
+
+ /*
+ * check for debug registers in system wide mode
+ *
+ * If though a check is done in pfm_context_load(),
+ * we must repeat it here, in case the registers are
+ * written after the context is loaded
+ */
+ if (is_loaded) {
+ LOCK_PFS(flags);
+
+ if (first_time && is_system) {
+ if (pfm_sessions.pfs_ptrace_use_dbregs)
+ ret = -EBUSY;
+ else
+ pfm_sessions.pfs_sys_use_dbregs++;
+ }
+ UNLOCK_PFS(flags);
+ }
+
+ if (ret != 0) return ret;
+
+ /*
+ * mark ourself as user of the debug registers for
+ * perfmon purposes.
+ */
+ ctx->ctx_fl_using_dbreg = 1;
+
+ /*
+ * clear hardware registers to make sure we don't
+ * pick up stale state.
+ *
+ * for a system wide session, we do not use
+ * thread.dbr, thread.ibr because this process
+ * never leaves the current CPU and the state
+ * is shared by all processes running on it
+ */
+ if (first_time && can_access_pmu) {
+ DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid));
+ for (i=0; i < pmu_conf->num_ibrs; i++) {
+ ia64_set_ibr(i, 0UL);
+ ia64_dv_serialize_instruction();
+ }
+ ia64_srlz_i();
+ for (i=0; i < pmu_conf->num_dbrs; i++) {
+ ia64_set_dbr(i, 0UL);
+ ia64_dv_serialize_data();
+ }
+ ia64_srlz_d();
+ }
+
+ /*
+ * Now install the values into the registers
+ */
+ for (i = 0; i < count; i++, req++) {
+
+ rnum = req->dbreg_num;
+ dbreg.val = req->dbreg_value;
+
+ ret = -EINVAL;
+
+ if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) {
+ DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n",
+ rnum, dbreg.val, mode, i, count));
+
+ goto abort_mission;
+ }
+
+ /*
+ * make sure we do not install enabled breakpoint
+ */
+ if (rnum & 0x1) {
+ if (mode == PFM_CODE_RR)
+ dbreg.ibr.ibr_x = 0;
+ else
+ dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0;
+ }
+
+ PFM_REG_RETFLAG_SET(req->dbreg_flags, 0);
+
+ /*
+ * Debug registers, just like PMC, can only be modified
+ * by a kernel call. Moreover, perfmon() access to those
+ * registers are centralized in this routine. The hardware
+ * does not modify the value of these registers, therefore,
+ * if we save them as they are written, we can avoid having
+ * to save them on context switch out. This is made possible
+ * by the fact that when perfmon uses debug registers, ptrace()
+ * won't be able to modify them concurrently.
+ */
+ if (mode == PFM_CODE_RR) {
+ CTX_USED_IBR(ctx, rnum);
+
+ if (can_access_pmu) {
+ ia64_set_ibr(rnum, dbreg.val);
+ ia64_dv_serialize_instruction();
+ }
+
+ ctx->ctx_ibrs[rnum] = dbreg.val;
+
+ DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n",
+ rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu));
+ } else {
+ CTX_USED_DBR(ctx, rnum);
+
+ if (can_access_pmu) {
+ ia64_set_dbr(rnum, dbreg.val);
+ ia64_dv_serialize_data();
+ }
+ ctx->ctx_dbrs[rnum] = dbreg.val;
+
+ DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n",
+ rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu));
+ }
+ }
+
+ return 0;
+
+abort_mission:
+ /*
+ * in case it was our first attempt, we undo the global modifications
+ */
+ if (first_time) {
+ LOCK_PFS(flags);
+ if (ctx->ctx_fl_system) {
+ pfm_sessions.pfs_sys_use_dbregs--;
+ }
+ UNLOCK_PFS(flags);
+ ctx->ctx_fl_using_dbreg = 0;
+ }
+ /*
+ * install error return flag
+ */
+ PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL);
+
+ return ret;
+}
+
+static int
+pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs);
+}
+
+static int
+pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs);
+}
+
+int
+pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
+{
+ pfm_context_t *ctx;
+
+ if (req == NULL) return -EINVAL;
+
+ ctx = GET_PMU_CTX();
+
+ if (ctx == NULL) return -EINVAL;
+
+ /*
+ * for now limit to current task, which is enough when calling
+ * from overflow handler
+ */
+ if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
+
+ return pfm_write_ibrs(ctx, req, nreq, regs);
+}
+EXPORT_SYMBOL(pfm_mod_write_ibrs);
+
+int
+pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
+{
+ pfm_context_t *ctx;
+
+ if (req == NULL) return -EINVAL;
+
+ ctx = GET_PMU_CTX();
+
+ if (ctx == NULL) return -EINVAL;
+
+ /*
+ * for now limit to current task, which is enough when calling
+ * from overflow handler
+ */
+ if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
+
+ return pfm_write_dbrs(ctx, req, nreq, regs);
+}
+EXPORT_SYMBOL(pfm_mod_write_dbrs);
+
+
+static int
+pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ pfarg_features_t *req = (pfarg_features_t *)arg;
+
+ req->ft_version = PFM_VERSION;
+ return 0;
+}
+
+static int
+pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ struct pt_regs *tregs;
+ struct task_struct *task = PFM_CTX_TASK(ctx);
+ int state, is_system;
+
+ state = ctx->ctx_state;
+ is_system = ctx->ctx_fl_system;
+
+ /*
+ * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE)
+ */
+ if (state == PFM_CTX_UNLOADED) return -EINVAL;
+
+ /*
+ * In system wide and when the context is loaded, access can only happen
+ * when the caller is running on the CPU being monitored by the session.
+ * It does not have to be the owner (ctx_task) of the context per se.
+ */
+ if (is_system && ctx->ctx_cpu != smp_processor_id()) {
+ DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+ return -EBUSY;
+ }
+ DPRINT(("task [%d] ctx_state=%d is_system=%d\n",
+ PFM_CTX_TASK(ctx)->pid,
+ state,
+ is_system));
+ /*
+ * in system mode, we need to update the PMU directly
+ * and the user level state of the caller, which may not
+ * necessarily be the creator of the context.
+ */
+ if (is_system) {
+ /*
+ * Update local PMU first
+ *
+ * disable dcr pp
+ */
+ ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
+ ia64_srlz_i();
+
+ /*
+ * update local cpuinfo
+ */
+ PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
+
+ /*
+ * stop monitoring, does srlz.i
+ */
+ pfm_clear_psr_pp();
+
+ /*
+ * stop monitoring in the caller
+ */
+ ia64_psr(regs)->pp = 0;
+
+ return 0;
+ }
+ /*
+ * per-task mode
+ */
+
+ if (task == current) {
+ /* stop monitoring at kernel level */
+ pfm_clear_psr_up();
+
+ /*
+ * stop monitoring at the user level
+ */
+ ia64_psr(regs)->up = 0;
+ } else {
+ tregs = ia64_task_regs(task);
+
+ /*
+ * stop monitoring at the user level
+ */
+ ia64_psr(tregs)->up = 0;
+
+ /*
+ * monitoring disabled in kernel at next reschedule
+ */
+ ctx->ctx_saved_psr_up = 0;
+ DPRINT(("task=[%d]\n", task->pid));
+ }
+ return 0;
+}
+
+
+static int
+pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ struct pt_regs *tregs;
+ int state, is_system;
+
+ state = ctx->ctx_state;
+ is_system = ctx->ctx_fl_system;
+
+ if (state != PFM_CTX_LOADED) return -EINVAL;
+
+ /*
+ * In system wide and when the context is loaded, access can only happen
+ * when the caller is running on the CPU being monitored by the session.
+ * It does not have to be the owner (ctx_task) of the context per se.
+ */
+ if (is_system && ctx->ctx_cpu != smp_processor_id()) {
+ DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
+ return -EBUSY;
+ }
+
+ /*
+ * in system mode, we need to update the PMU directly
+ * and the user level state of the caller, which may not
+ * necessarily be the creator of the context.
+ */
+ if (is_system) {
+
+ /*
+ * set user level psr.pp for the caller
+ */
+ ia64_psr(regs)->pp = 1;
+
+ /*
+ * now update the local PMU and cpuinfo
+ */
+ PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP);
+
+ /*
+ * start monitoring at kernel level
+ */
+ pfm_set_psr_pp();
+
+ /* enable dcr pp */
+ ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
+ ia64_srlz_i();
+
+ return 0;
+ }
+
+ /*
+ * per-process mode
+ */
+
+ if (ctx->ctx_task == current) {
+
+ /* start monitoring at kernel level */
+ pfm_set_psr_up();
+
+ /*
+ * activate monitoring at user level
+ */
+ ia64_psr(regs)->up = 1;
+
+ } else {
+ tregs = ia64_task_regs(ctx->ctx_task);
+
+ /*
+ * start monitoring at the kernel level the next
+ * time the task is scheduled
+ */
+ ctx->ctx_saved_psr_up = IA64_PSR_UP;
+
+ /*
+ * activate monitoring at user level
+ */
+ ia64_psr(tregs)->up = 1;
+ }
+ return 0;
+}
+
+static int
+pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ pfarg_reg_t *req = (pfarg_reg_t *)arg;
+ unsigned int cnum;
+ int i;
+ int ret = -EINVAL;
+
+ for (i = 0; i < count; i++, req++) {
+
+ cnum = req->reg_num;
+
+ if (!PMC_IS_IMPL(cnum)) goto abort_mission;
+
+ req->reg_value = PMC_DFL_VAL(cnum);
+
+ PFM_REG_RETFLAG_SET(req->reg_flags, 0);
+
+ DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value));
+ }
+ return 0;
+
+abort_mission:
+ PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
+ return ret;
+}
+
+static int
+pfm_check_task_exist(pfm_context_t *ctx)
+{
+ struct task_struct *g, *t;
+ int ret = -ESRCH;
+
+ read_lock(&tasklist_lock);
+
+ do_each_thread (g, t) {
+ if (t->thread.pfm_context == ctx) {
+ ret = 0;
+ break;
+ }
+ } while_each_thread (g, t);
+
+ read_unlock(&tasklist_lock);
+
+ DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));
+
+ return ret;
+}
+
+static int
+pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ struct task_struct *task;
+ struct thread_struct *thread;
+ struct pfm_context_t *old;
+ unsigned long flags;
+#ifndef CONFIG_SMP
+ struct task_struct *owner_task = NULL;
+#endif
+ pfarg_load_t *req = (pfarg_load_t *)arg;
+ unsigned long *pmcs_source, *pmds_source;
+ int the_cpu;
+ int ret = 0;
+ int state, is_system, set_dbregs = 0;
+
+ state = ctx->ctx_state;
+ is_system = ctx->ctx_fl_system;
+ /*
+ * can only load from unloaded or terminated state
+ */
+ if (state != PFM_CTX_UNLOADED) {
+ DPRINT(("cannot load to [%d], invalid ctx_state=%d\n",
+ req->load_pid,
+ ctx->ctx_state));
+ return -EINVAL;
+ }
+
+ DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg));
+
+ if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) {
+ DPRINT(("cannot use blocking mode on self\n"));
+ return -EINVAL;
+ }
+
+ ret = pfm_get_task(ctx, req->load_pid, &task);
+ if (ret) {
+ DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret));
+ return ret;
+ }
+
+ ret = -EINVAL;
+
+ /*
+ * system wide is self monitoring only
+ */
+ if (is_system && task != current) {
+ DPRINT(("system wide is self monitoring only load_pid=%d\n",
+ req->load_pid));
+ goto error;
+ }
+
+ thread = &task->thread;
+
+ ret = 0;
+ /*
+ * cannot load a context which is using range restrictions,
+ * into a task that is being debugged.
+ */
+ if (ctx->ctx_fl_using_dbreg) {
+ if (thread->flags & IA64_THREAD_DBG_VALID) {
+ ret = -EBUSY;
+ DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid));
+ goto error;
+ }
+ LOCK_PFS(flags);
+
+ if (is_system) {
+ if (pfm_sessions.pfs_ptrace_use_dbregs) {
+ DPRINT(("cannot load [%d] dbregs in use\n", task->pid));
+ ret = -EBUSY;
+ } else {
+ pfm_sessions.pfs_sys_use_dbregs++;
+ DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs));
+ set_dbregs = 1;
+ }
+ }
+
+ UNLOCK_PFS(flags);
+
+ if (ret) goto error;
+ }
+
+ /*
+ * SMP system-wide monitoring implies self-monitoring.
+ *
+ * The programming model expects the task to
+ * be pinned on a CPU throughout the session.
+ * Here we take note of the current CPU at the
+ * time the context is loaded. No call from
+ * another CPU will be allowed.
+ *
+ * The pinning via shed_setaffinity()
+ * must be done by the calling task prior
+ * to this call.
+ *
+ * systemwide: keep track of CPU this session is supposed to run on
+ */
+ the_cpu = ctx->ctx_cpu = smp_processor_id();
+
+ ret = -EBUSY;
+ /*
+ * now reserve the session
+ */
+ ret = pfm_reserve_session(current, is_system, the_cpu);
+ if (ret) goto error;
+
+ /*
+ * task is necessarily stopped at this point.
+ *
+ * If the previous context was zombie, then it got removed in
+ * pfm_save_regs(). Therefore we should not see it here.
+ * If we see a context, then this is an active context
+ *
+ * XXX: needs to be atomic
+ */
+ DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n",
+ thread->pfm_context, ctx));
+
+ old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *));
+ if (old != NULL) {
+ DPRINT(("load_pid [%d] already has a context\n", req->load_pid));
+ goto error_unres;
+ }
+
+ pfm_reset_msgq(ctx);
+
+ ctx->ctx_state = PFM_CTX_LOADED;
+
+ /*
+ * link context to task
+ */
+ ctx->ctx_task = task;
+
+ if (is_system) {
+ /*
+ * we load as stopped
+ */
+ PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE);
+ PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
+
+ if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE);
+ } else {
+ thread->flags |= IA64_THREAD_PM_VALID;
+ }
+
+ /*
+ * propagate into thread-state
+ */
+ pfm_copy_pmds(task, ctx);
+ pfm_copy_pmcs(task, ctx);
+
+ pmcs_source = thread->pmcs;
+ pmds_source = thread->pmds;
+
+ /*
+ * always the case for system-wide
+ */
+ if (task == current) {
+
+ if (is_system == 0) {
+
+ /* allow user level control */
+ ia64_psr(regs)->sp = 0;
+ DPRINT(("clearing psr.sp for [%d]\n", task->pid));
+
+ SET_LAST_CPU(ctx, smp_processor_id());
+ INC_ACTIVATION();
+ SET_ACTIVATION(ctx);
+#ifndef CONFIG_SMP
+ /*
+ * push the other task out, if any
+ */
+ owner_task = GET_PMU_OWNER();
+ if (owner_task) pfm_lazy_save_regs(owner_task);
+#endif
+ }
+ /*
+ * load all PMD from ctx to PMU (as opposed to thread state)
+ * restore all PMC from ctx to PMU
+ */
+ pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]);
+ pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]);
+
+ ctx->ctx_reload_pmcs[0] = 0UL;
+ ctx->ctx_reload_pmds[0] = 0UL;
+
+ /*
+ * guaranteed safe by earlier check against DBG_VALID
+ */
+ if (ctx->ctx_fl_using_dbreg) {
+ pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
+ pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
+ }
+ /*
+ * set new ownership
+ */
+ SET_PMU_OWNER(task, ctx);
+
+ DPRINT(("context loaded on PMU for [%d]\n", task->pid));
+ } else {
+ /*
+ * when not current, task MUST be stopped, so this is safe
+ */
+ regs = ia64_task_regs(task);
+
+ /* force a full reload */
+ ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
+ SET_LAST_CPU(ctx, -1);
+
+ /* initial saved psr (stopped) */
+ ctx->ctx_saved_psr_up = 0UL;
+ ia64_psr(regs)->up = ia64_psr(regs)->pp = 0;
+ }
+
+ ret = 0;
+
+error_unres:
+ if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu);
+error:
+ /*
+ * we must undo the dbregs setting (for system-wide)
+ */
+ if (ret && set_dbregs) {
+ LOCK_PFS(flags);
+ pfm_sessions.pfs_sys_use_dbregs--;
+ UNLOCK_PFS(flags);
+ }
+ /*
+ * release task, there is now a link with the context
+ */
+ if (is_system == 0 && task != current) {
+ pfm_put_task(task);
+
+ if (ret == 0) {
+ ret = pfm_check_task_exist(ctx);
+ if (ret) {
+ ctx->ctx_state = PFM_CTX_UNLOADED;
+ ctx->ctx_task = NULL;
+ }
+ }
+ }
+ return ret;
+}
+
+/*
+ * in this function, we do not need to increase the use count
+ * for the task via get_task_struct(), because we hold the
+ * context lock. If the task were to disappear while having
+ * a context attached, it would go through pfm_exit_thread()
+ * which also grabs the context lock and would therefore be blocked
+ * until we are here.
+ */
+static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx);
+
+static int
+pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ struct task_struct *task = PFM_CTX_TASK(ctx);
+ struct pt_regs *tregs;
+ int prev_state, is_system;
+ int ret;
+
+ DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1));
+
+ prev_state = ctx->ctx_state;
+ is_system = ctx->ctx_fl_system;
+
+ /*
+ * unload only when necessary
+ */
+ if (prev_state == PFM_CTX_UNLOADED) {
+ DPRINT(("ctx_state=%d, nothing to do\n", prev_state));
+ return 0;
+ }
+
+ /*
+ * clear psr and dcr bits
+ */
+ ret = pfm_stop(ctx, NULL, 0, regs);
+ if (ret) return ret;
+
+ ctx->ctx_state = PFM_CTX_UNLOADED;
+
+ /*
+ * in system mode, we need to update the PMU directly
+ * and the user level state of the caller, which may not
+ * necessarily be the creator of the context.
+ */
+ if (is_system) {
+
+ /*
+ * Update cpuinfo
+ *
+ * local PMU is taken care of in pfm_stop()
+ */
+ PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE);
+ PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE);
+
+ /*
+ * save PMDs in context
+ * release ownership
+ */
+ pfm_flush_pmds(current, ctx);
+
+ /*
+ * at this point we are done with the PMU
+ * so we can unreserve the resource.
+ */
+ if (prev_state != PFM_CTX_ZOMBIE)
+ pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu);
+
+ /*
+ * disconnect context from task
+ */
+ task->thread.pfm_context = NULL;
+ /*
+ * disconnect task from context
+ */
+ ctx->ctx_task = NULL;
+
+ /*
+ * There is nothing more to cleanup here.
+ */
+ return 0;
+ }
+
+ /*
+ * per-task mode
+ */
+ tregs = task == current ? regs : ia64_task_regs(task);
+
+ if (task == current) {
+ /*
+ * cancel user level control
+ */
+ ia64_psr(regs)->sp = 1;
+
+ DPRINT(("setting psr.sp for [%d]\n", task->pid));
+ }
+ /*
+ * save PMDs to context
+ * release ownership
+ */
+ pfm_flush_pmds(task, ctx);
+
+ /*
+ * at this point we are done with the PMU
+ * so we can unreserve the resource.
+ *
+ * when state was ZOMBIE, we have already unreserved.
+ */
+ if (prev_state != PFM_CTX_ZOMBIE)
+ pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu);
+
+ /*
+ * reset activation counter and psr
+ */
+ ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
+ SET_LAST_CPU(ctx, -1);
+
+ /*
+ * PMU state will not be restored
+ */
+ task->thread.flags &= ~IA64_THREAD_PM_VALID;
+
+ /*
+ * break links between context and task
+ */
+ task->thread.pfm_context = NULL;
+ ctx->ctx_task = NULL;
+
+ PFM_SET_WORK_PENDING(task, 0);
+
+ ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
+ ctx->ctx_fl_can_restart = 0;
+ ctx->ctx_fl_going_zombie = 0;
+
+ DPRINT(("disconnected [%d] from context\n", task->pid));
+
+ return 0;
+}
+
+
+/*
+ * called only from exit_thread(): task == current
+ * we come here only if current has a context attached (loaded or masked)
+ */
+void
+pfm_exit_thread(struct task_struct *task)
+{
+ pfm_context_t *ctx;
+ unsigned long flags;
+ struct pt_regs *regs = ia64_task_regs(task);
+ int ret, state;
+ int free_ok = 0;
+
+ ctx = PFM_GET_CTX(task);
+
+ PROTECT_CTX(ctx, flags);
+
+ DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid));
+
+ state = ctx->ctx_state;
+ switch(state) {
+ case PFM_CTX_UNLOADED:
+ /*
+ * only comes to thios function if pfm_context is not NULL, i.e., cannot
+ * be in unloaded state
+ */
+ printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid);
+ break;
+ case PFM_CTX_LOADED:
+ case PFM_CTX_MASKED:
+ ret = pfm_context_unload(ctx, NULL, 0, regs);
+ if (ret) {
+ printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
+ }
+ DPRINT(("ctx unloaded for current state was %d\n", state));
+
+ pfm_end_notify_user(ctx);
+ break;
+ case PFM_CTX_ZOMBIE:
+ ret = pfm_context_unload(ctx, NULL, 0, regs);
+ if (ret) {
+ printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
+ }
+ free_ok = 1;
+ break;
+ default:
+ printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state);
+ break;
+ }
+ UNPROTECT_CTX(ctx, flags);
+
+ { u64 psr = pfm_get_psr();
+ BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
+ BUG_ON(GET_PMU_OWNER());
+ BUG_ON(ia64_psr(regs)->up);
+ BUG_ON(ia64_psr(regs)->pp);
+ }
+
+ /*
+ * All memory free operations (especially for vmalloc'ed memory)
+ * MUST be done with interrupts ENABLED.
+ */
+ if (free_ok) pfm_context_free(ctx);
+}
+
+/*
+ * functions MUST be listed in the increasing order of their index (see permfon.h)
+ */
+#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz }
+#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL }
+#define PFM_CMD_PCLRWS (PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP)
+#define PFM_CMD_PCLRW (PFM_CMD_FD|PFM_CMD_ARG_RW)
+#define PFM_CMD_NONE { NULL, "no-cmd", 0, 0, 0, NULL}
+
+static pfm_cmd_desc_t pfm_cmd_tab[]={
+/* 0 */PFM_CMD_NONE,
+/* 1 */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
+/* 2 */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
+/* 3 */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
+/* 4 */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS),
+/* 5 */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS),
+/* 6 */PFM_CMD_NONE,
+/* 7 */PFM_CMD_NONE,
+/* 8 */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize),
+/* 9 */PFM_CMD_NONE,
+/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW),
+/* 11 */PFM_CMD_NONE,
+/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL),
+/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL),
+/* 14 */PFM_CMD_NONE,
+/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
+/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL),
+/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS),
+/* 18 */PFM_CMD_NONE,
+/* 19 */PFM_CMD_NONE,
+/* 20 */PFM_CMD_NONE,
+/* 21 */PFM_CMD_NONE,
+/* 22 */PFM_CMD_NONE,
+/* 23 */PFM_CMD_NONE,
+/* 24 */PFM_CMD_NONE,
+/* 25 */PFM_CMD_NONE,
+/* 26 */PFM_CMD_NONE,
+/* 27 */PFM_CMD_NONE,
+/* 28 */PFM_CMD_NONE,
+/* 29 */PFM_CMD_NONE,
+/* 30 */PFM_CMD_NONE,
+/* 31 */PFM_CMD_NONE,
+/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL),
+/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL)
+};
+#define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
+
+static int
+pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
+{
+ struct task_struct *task;
+ int state, old_state;
+
+recheck:
+ state = ctx->ctx_state;
+ task = ctx->ctx_task;
+
+ if (task == NULL) {
+ DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state));
+ return 0;
+ }
+
+ DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
+ ctx->ctx_fd,
+ state,
+ task->pid,
+ task->state, PFM_CMD_STOPPED(cmd)));
+
+ /*
+ * self-monitoring always ok.
+ *
+ * for system-wide the caller can either be the creator of the
+ * context (to one to which the context is attached to) OR
+ * a task running on the same CPU as the session.
+ */
+ if (task == current || ctx->ctx_fl_system) return 0;
+
+ /*
+ * if context is UNLOADED we are safe to go
+ */
+ if (state == PFM_CTX_UNLOADED) return 0;
+
+ /*
+ * no command can operate on a zombie context
+ */
+ if (state == PFM_CTX_ZOMBIE) {
+ DPRINT(("cmd %d state zombie cannot operate on context\n", cmd));
+ return -EINVAL;
+ }
+
+ /*
+ * context is LOADED or MASKED. Some commands may need to have
+ * the task stopped.
+ *
+ * We could lift this restriction for UP but it would mean that
+ * the user has no guarantee the task would not run between
+ * two successive calls to perfmonctl(). That's probably OK.
+ * If this user wants to ensure the task does not run, then
+ * the task must be stopped.
+ */
+ if (PFM_CMD_STOPPED(cmd)) {
+ if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
+ DPRINT(("[%d] task not in stopped state\n", task->pid));
+ return -EBUSY;
+ }
+ /*
+ * task is now stopped, wait for ctxsw out
+ *
+ * This is an interesting point in the code.
+ * We need to unprotect the context because
+ * the pfm_save_regs() routines needs to grab
+ * the same lock. There are danger in doing
+ * this because it leaves a window open for
+ * another task to get access to the context
+ * and possibly change its state. The one thing
+ * that is not possible is for the context to disappear
+ * because we are protected by the VFS layer, i.e.,
+ * get_fd()/put_fd().
+ */
+ old_state = state;
+
+ UNPROTECT_CTX(ctx, flags);
+
+ wait_task_inactive(task);
+
+ PROTECT_CTX(ctx, flags);
+
+ /*
+ * we must recheck to verify if state has changed
+ */
+ if (ctx->ctx_state != old_state) {
+ DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state));
+ goto recheck;
+ }
+ }
+ return 0;
+}
+
+/*
+ * system-call entry point (must return long)
+ */
+asmlinkage long
+sys_perfmonctl (int fd, int cmd, void __user *arg, int count)
+{
+ struct file *file = NULL;
+ pfm_context_t *ctx = NULL;
+ unsigned long flags = 0UL;
+ void *args_k = NULL;
+ long ret; /* will expand int return types */
+ size_t base_sz, sz, xtra_sz = 0;
+ int narg, completed_args = 0, call_made = 0, cmd_flags;
+ int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
+ int (*getsize)(void *arg, size_t *sz);
+#define PFM_MAX_ARGSIZE 4096
+
+ /*
+ * reject any call if perfmon was disabled at initialization
+ */
+ if (unlikely(pmu_conf == NULL)) return -ENOSYS;
+
+ if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) {
+ DPRINT(("invalid cmd=%d\n", cmd));
+ return -EINVAL;
+ }
+
+ func = pfm_cmd_tab[cmd].cmd_func;
+ narg = pfm_cmd_tab[cmd].cmd_narg;
+ base_sz = pfm_cmd_tab[cmd].cmd_argsize;
+ getsize = pfm_cmd_tab[cmd].cmd_getsize;
+ cmd_flags = pfm_cmd_tab[cmd].cmd_flags;
+
+ if (unlikely(func == NULL)) {
+ DPRINT(("invalid cmd=%d\n", cmd));
+ return -EINVAL;
+ }
+
+ DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n",
+ PFM_CMD_NAME(cmd),
+ cmd,
+ narg,
+ base_sz,
+ count));
+
+ /*
+ * check if number of arguments matches what the command expects
+ */
+ if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count)))
+ return -EINVAL;
+
+restart_args:
+ sz = xtra_sz + base_sz*count;
+ /*
+ * limit abuse to min page size
+ */
+ if (unlikely(sz > PFM_MAX_ARGSIZE)) {
+ printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz);
+ return -E2BIG;
+ }
+
+ /*
+ * allocate default-sized argument buffer
+ */
+ if (likely(count && args_k == NULL)) {
+ args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL);
+ if (args_k == NULL) return -ENOMEM;
+ }
+
+ ret = -EFAULT;
+
+ /*
+ * copy arguments
+ *
+ * assume sz = 0 for command without parameters
+ */
+ if (sz && copy_from_user(args_k, arg, sz)) {
+ DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg));
+ goto error_args;
+ }
+
+ /*
+ * check if command supports extra parameters
+ */
+ if (completed_args == 0 && getsize) {
+ /*
+ * get extra parameters size (based on main argument)
+ */
+ ret = (*getsize)(args_k, &xtra_sz);
+ if (ret) goto error_args;
+
+ completed_args = 1;
+
+ DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz));
+
+ /* retry if necessary */
+ if (likely(xtra_sz)) goto restart_args;
+ }
+
+ if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd;
+
+ ret = -EBADF;
+
+ file = fget(fd);
+ if (unlikely(file == NULL)) {
+ DPRINT(("invalid fd %d\n", fd));
+ goto error_args;
+ }
+ if (unlikely(PFM_IS_FILE(file) == 0)) {
+ DPRINT(("fd %d not related to perfmon\n", fd));
+ goto error_args;
+ }
+
+ ctx = (pfm_context_t *)file->private_data;
+ if (unlikely(ctx == NULL)) {
+ DPRINT(("no context for fd %d\n", fd));
+ goto error_args;
+ }
+ prefetch(&ctx->ctx_state);
+
+ PROTECT_CTX(ctx, flags);
+
+ /*
+ * check task is stopped
+ */
+ ret = pfm_check_task_state(ctx, cmd, flags);
+ if (unlikely(ret)) goto abort_locked;
+
+skip_fd:
+ ret = (*func)(ctx, args_k, count, ia64_task_regs(current));
+
+ call_made = 1;
+
+abort_locked:
+ if (likely(ctx)) {
+ DPRINT(("context unlocked\n"));
+ UNPROTECT_CTX(ctx, flags);
+ fput(file);
+ }
+
+ /* copy argument back to user, if needed */
+ if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
+
+error_args:
+ if (args_k) kfree(args_k);
+
+ DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));
+
+ return ret;
+}
+
+static void
+pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs)
+{
+ pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt;
+ pfm_ovfl_ctrl_t rst_ctrl;
+ int state;
+ int ret = 0;
+
+ state = ctx->ctx_state;
+ /*
+ * Unlock sampling buffer and reset index atomically
+ * XXX: not really needed when blocking
+ */
+ if (CTX_HAS_SMPL(ctx)) {
+
+ rst_ctrl.bits.mask_monitoring = 0;
+ rst_ctrl.bits.reset_ovfl_pmds = 0;
+
+ if (state == PFM_CTX_LOADED)
+ ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
+ else
+ ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
+ } else {
+ rst_ctrl.bits.mask_monitoring = 0;
+ rst_ctrl.bits.reset_ovfl_pmds = 1;
+ }
+
+ if (ret == 0) {
+ if (rst_ctrl.bits.reset_ovfl_pmds) {
+ pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET);
+ }
+ if (rst_ctrl.bits.mask_monitoring == 0) {
+ DPRINT(("resuming monitoring\n"));
+ if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current);
+ } else {
+ DPRINT(("stopping monitoring\n"));
+ //pfm_stop_monitoring(current, regs);
+ }
+ ctx->ctx_state = PFM_CTX_LOADED;
+ }
+}
+
+/*
+ * context MUST BE LOCKED when calling
+ * can only be called for current
+ */
+static void
+pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
+{
+ int ret;
+
+ DPRINT(("entering for [%d]\n", current->pid));
+
+ ret = pfm_context_unload(ctx, NULL, 0, regs);
+ if (ret) {
+ printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret);
+ }
+
+ /*
+ * and wakeup controlling task, indicating we are now disconnected
+ */
+ wake_up_interruptible(&ctx->ctx_zombieq);
+
+ /*
+ * given that context is still locked, the controlling
+ * task will only get access when we return from
+ * pfm_handle_work().
+ */
+}
+
+static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds);
+
+void
+pfm_handle_work(void)
+{
+ pfm_context_t *ctx;
+ struct pt_regs *regs;
+ unsigned long flags;
+ unsigned long ovfl_regs;
+ unsigned int reason;
+ int ret;
+
+ ctx = PFM_GET_CTX(current);
+ if (ctx == NULL) {
+ printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid);
+ return;
+ }
+
+ PROTECT_CTX(ctx, flags);
+
+ PFM_SET_WORK_PENDING(current, 0);
+
+ pfm_clear_task_notify();
+
+ regs = ia64_task_regs(current);
+
+ /*
+ * extract reason for being here and clear
+ */
+ reason = ctx->ctx_fl_trap_reason;
+ ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
+ ovfl_regs = ctx->ctx_ovfl_regs[0];
+
+ DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state));
+
+ /*
+ * must be done before we check for simple-reset mode
+ */
+ if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie;
+
+
+ //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking;
+ if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking;
+
+ UNPROTECT_CTX(ctx, flags);
+
+ /*
+ * pfm_handle_work() is currently called with interrupts disabled.
+ * The down_interruptible call may sleep, therefore we
+ * must re-enable interrupts to avoid deadlocks. It is
+ * safe to do so because this function is called ONLY
+ * when returning to user level (PUStk=1), in which case
+ * there is no risk of kernel stack overflow due to deep
+ * interrupt nesting.
+ */
+ BUG_ON(flags & IA64_PSR_I);
+ local_irq_enable();
+
+ DPRINT(("before block sleeping\n"));
+
+ /*
+ * may go through without blocking on SMP systems
+ * if restart has been received already by the time we call down()
+ */
+ ret = down_interruptible(&ctx->ctx_restart_sem);
+
+ DPRINT(("after block sleeping ret=%d\n", ret));
+
+ /*
+ * disable interrupts to restore state we had upon entering
+ * this function
+ */
+ local_irq_disable();
+
+ PROTECT_CTX(ctx, flags);
+
+ /*
+ * we need to read the ovfl_regs only after wake-up
+ * because we may have had pfm_write_pmds() in between
+ * and that can changed PMD values and therefore
+ * ovfl_regs is reset for these new PMD values.
+ */
+ ovfl_regs = ctx->ctx_ovfl_regs[0];
+
+ if (ctx->ctx_fl_going_zombie) {
+do_zombie:
+ DPRINT(("context is zombie, bailing out\n"));
+ pfm_context_force_terminate(ctx, regs);
+ goto nothing_to_do;
+ }
+ /*
+ * in case of interruption of down() we don't restart anything
+ */
+ if (ret < 0) goto nothing_to_do;
+
+skip_blocking:
+ pfm_resume_after_ovfl(ctx, ovfl_regs, regs);
+ ctx->ctx_ovfl_regs[0] = 0UL;
+
+nothing_to_do:
+
+ UNPROTECT_CTX(ctx, flags);
+}
+
+static int
+pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg)
+{
+ if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
+ DPRINT(("ignoring overflow notification, owner is zombie\n"));
+ return 0;
+ }
+
+ DPRINT(("waking up somebody\n"));
+
+ if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait);
+
+ /*
+ * safe, we are not in intr handler, nor in ctxsw when
+ * we come here
+ */
+ kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN);
+
+ return 0;
+}
+
+static int
+pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds)
+{
+ pfm_msg_t *msg = NULL;
+
+ if (ctx->ctx_fl_no_msg == 0) {
+ msg = pfm_get_new_msg(ctx);
+ if (msg == NULL) {
+ printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n");
+ return -1;
+ }
+
+ msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL;
+ msg->pfm_ovfl_msg.msg_ctx_fd = ctx->ctx_fd;
+ msg->pfm_ovfl_msg.msg_active_set = 0;
+ msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds;
+ msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL;
+ msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL;
+ msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL;
+ msg->pfm_ovfl_msg.msg_tstamp = 0UL;
+ }
+
+ DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n",
+ msg,
+ ctx->ctx_fl_no_msg,
+ ctx->ctx_fd,
+ ovfl_pmds));
+
+ return pfm_notify_user(ctx, msg);
+}
+
+static int
+pfm_end_notify_user(pfm_context_t *ctx)
+{
+ pfm_msg_t *msg;
+
+ msg = pfm_get_new_msg(ctx);
+ if (msg == NULL) {
+ printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n");
+ return -1;
+ }
+ /* no leak */
+ memset(msg, 0, sizeof(*msg));
+
+ msg->pfm_end_msg.msg_type = PFM_MSG_END;
+ msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd;
+ msg->pfm_ovfl_msg.msg_tstamp = 0UL;
+
+ DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n",
+ msg,
+ ctx->ctx_fl_no_msg,
+ ctx->ctx_fd));
+
+ return pfm_notify_user(ctx, msg);
+}
+
+/*
+ * main overflow processing routine.
+ * it can be called from the interrupt path or explicitely during the context switch code
+ */
+static void
+pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
+{
+ pfm_ovfl_arg_t *ovfl_arg;
+ unsigned long mask;
+ unsigned long old_val, ovfl_val, new_val;
+ unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds;
+ unsigned long tstamp;
+ pfm_ovfl_ctrl_t ovfl_ctrl;
+ unsigned int i, has_smpl;
+ int must_notify = 0;
+
+ if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring;
+
+ /*
+ * sanity test. Should never happen
+ */
+ if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check;
+
+ tstamp = ia64_get_itc();
+ mask = pmc0 >> PMU_FIRST_COUNTER;
+ ovfl_val = pmu_conf->ovfl_val;
+ has_smpl = CTX_HAS_SMPL(ctx);
+
+ DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
+ "used_pmds=0x%lx\n",
+ pmc0,
+ task ? task->pid: -1,
+ (regs ? regs->cr_iip : 0),
+ CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
+ ctx->ctx_used_pmds[0]));
+
+
+ /*
+ * first we update the virtual counters
+ * assume there was a prior ia64_srlz_d() issued
+ */
+ for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) {
+
+ /* skip pmd which did not overflow */
+ if ((mask & 0x1) == 0) continue;
+
+ /*
+ * Note that the pmd is not necessarily 0 at this point as qualified events
+ * may have happened before the PMU was frozen. The residual count is not
+ * taken into consideration here but will be with any read of the pmd via
+ * pfm_read_pmds().
+ */
+ old_val = new_val = ctx->ctx_pmds[i].val;
+ new_val += 1 + ovfl_val;
+ ctx->ctx_pmds[i].val = new_val;
+
+ /*
+ * check for overflow condition
+ */
+ if (likely(old_val > new_val)) {
+ ovfl_pmds |= 1UL << i;
+ if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i;
+ }
+
+ DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n",
+ i,
+ new_val,
+ old_val,
+ ia64_get_pmd(i) & ovfl_val,
+ ovfl_pmds,
+ ovfl_notify));
+ }
+
+ /*
+ * there was no 64-bit overflow, nothing else to do
+ */
+ if (ovfl_pmds == 0UL) return;
+
+ /*
+ * reset all control bits
+ */
+ ovfl_ctrl.val = 0;
+ reset_pmds = 0UL;
+
+ /*
+ * if a sampling format module exists, then we "cache" the overflow by
+ * calling the module's handler() routine.
+ */
+ if (has_smpl) {
+ unsigned long start_cycles, end_cycles;
+ unsigned long pmd_mask;
+ int j, k, ret = 0;
+ int this_cpu = smp_processor_id();
+
+ pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER;
+ ovfl_arg = &ctx->ctx_ovfl_arg;
+
+ prefetch(ctx->ctx_smpl_hdr);
+
+ for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) {
+
+ mask = 1UL << i;
+
+ if ((pmd_mask & 0x1) == 0) continue;
+
+ ovfl_arg->ovfl_pmd = (unsigned char )i;
+ ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0;
+ ovfl_arg->active_set = 0;
+ ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */
+ ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0];
+
+ ovfl_arg->pmd_value = ctx->ctx_pmds[i].val;
+ ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval;
+ ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid;
+
+ /*
+ * copy values of pmds of interest. Sampling format may copy them
+ * into sampling buffer.
+ */
+ if (smpl_pmds) {
+ for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) {
+ if ((smpl_pmds & 0x1) == 0) continue;
+ ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j);
+ DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1]));
+ }
+ }
+
+ pfm_stats[this_cpu].pfm_smpl_handler_calls++;
+
+ start_cycles = ia64_get_itc();
+
+ /*
+ * call custom buffer format record (handler) routine
+ */
+ ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp);
+
+ end_cycles = ia64_get_itc();
+
+ /*
+ * For those controls, we take the union because they have
+ * an all or nothing behavior.
+ */
+ ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user;
+ ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task;
+ ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring;
+ /*
+ * build the bitmask of pmds to reset now
+ */
+ if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask;
+
+ pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles;
+ }
+ /*
+ * when the module cannot handle the rest of the overflows, we abort right here
+ */
+ if (ret && pmd_mask) {
+ DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n",
+ pmd_mask<<PMU_FIRST_COUNTER));
+ }
+ /*
+ * remove the pmds we reset now from the set of pmds to reset in pfm_restart()
+ */
+ ovfl_pmds &= ~reset_pmds;
+ } else {
+ /*
+ * when no sampling module is used, then the default
+ * is to notify on overflow if requested by user
+ */
+ ovfl_ctrl.bits.notify_user = ovfl_notify ? 1 : 0;
+ ovfl_ctrl.bits.block_task = ovfl_notify ? 1 : 0;
+ ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */
+ ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1;
+ /*
+ * if needed, we reset all overflowed pmds
+ */
+ if (ovfl_notify == 0) reset_pmds = ovfl_pmds;
+ }
+
+ DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds));
+
+ /*
+ * reset the requested PMD registers using the short reset values
+ */
+ if (reset_pmds) {
+ unsigned long bm = reset_pmds;
+ pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET);
+ }
+
+ if (ovfl_notify && ovfl_ctrl.bits.notify_user) {
+ /*
+ * keep track of what to reset when unblocking
+ */
+ ctx->ctx_ovfl_regs[0] = ovfl_pmds;
+
+ /*
+ * check for blocking context
+ */
+ if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) {
+
+ ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK;
+
+ /*
+ * set the perfmon specific checking pending work for the task
+ */
+ PFM_SET_WORK_PENDING(task, 1);
+
+ /*
+ * when coming from ctxsw, current still points to the
+ * previous task, therefore we must work with task and not current.
+ */
+ pfm_set_task_notify(task);
+ }
+ /*
+ * defer until state is changed (shorten spin window). the context is locked
+ * anyway, so the signal receiver would come spin for nothing.
+ */
+ must_notify = 1;
+ }
+
+ DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n",
+ GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1,
+ PFM_GET_WORK_PENDING(task),
+ ctx->ctx_fl_trap_reason,
+ ovfl_pmds,
+ ovfl_notify,
+ ovfl_ctrl.bits.mask_monitoring ? 1 : 0));
+ /*
+ * in case monitoring must be stopped, we toggle the psr bits
+ */
+ if (ovfl_ctrl.bits.mask_monitoring) {
+ pfm_mask_monitoring(task);
+ ctx->ctx_state = PFM_CTX_MASKED;
+ ctx->ctx_fl_can_restart = 1;
+ }
+
+ /*
+ * send notification now
+ */
+ if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify);
+
+ return;
+
+sanity_check:
+ printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
+ smp_processor_id(),
+ task ? task->pid : -1,
+ pmc0);
+ return;
+
+stop_monitoring:
+ /*
+ * in SMP, zombie context is never restored but reclaimed in pfm_load_regs().
+ * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can
+ * come here as zombie only if the task is the current task. In which case, we
+ * can access the PMU hardware directly.
+ *
+ * Note that zombies do have PM_VALID set. So here we do the minimal.
+ *
+ * In case the context was zombified it could not be reclaimed at the time
+ * the monitoring program exited. At this point, the PMU reservation has been
+ * returned, the sampiing buffer has been freed. We must convert this call
+ * into a spurious interrupt. However, we must also avoid infinite overflows
+ * by stopping monitoring for this task. We can only come here for a per-task
+ * context. All we need to do is to stop monitoring using the psr bits which
+ * are always task private. By re-enabling secure montioring, we ensure that
+ * the monitored task will not be able to re-activate monitoring.
+ * The task will eventually be context switched out, at which point the context
+ * will be reclaimed (that includes releasing ownership of the PMU).
+ *
+ * So there might be a window of time where the number of per-task session is zero
+ * yet one PMU might have a owner and get at most one overflow interrupt for a zombie
+ * context. This is safe because if a per-task session comes in, it will push this one
+ * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide
+ * session is force on that CPU, given that we use task pinning, pfm_save_regs() will
+ * also push our zombie context out.
+ *
+ * Overall pretty hairy stuff....
+ */
+ DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1));
+ pfm_clear_psr_up();
+ ia64_psr(regs)->up = 0;
+ ia64_psr(regs)->sp = 1;
+ return;
+}
+
+static int
+pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
+{
+ struct task_struct *task;
+ pfm_context_t *ctx;
+ unsigned long flags;
+ u64 pmc0;
+ int this_cpu = smp_processor_id();
+ int retval = 0;
+
+ pfm_stats[this_cpu].pfm_ovfl_intr_count++;
+
+ /*
+ * srlz.d done before arriving here
+ */
+ pmc0 = ia64_get_pmc(0);
+
+ task = GET_PMU_OWNER();
+ ctx = GET_PMU_CTX();
+
+ /*
+ * if we have some pending bits set
+ * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1
+ */
+ if (PMC0_HAS_OVFL(pmc0) && task) {
+ /*
+ * we assume that pmc0.fr is always set here
+ */
+
+ /* sanity check */
+ if (!ctx) goto report_spurious1;
+
+ if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0)
+ goto report_spurious2;
+
+ PROTECT_CTX_NOPRINT(ctx, flags);
+
+ pfm_overflow_handler(task, ctx, pmc0, regs);
+
+ UNPROTECT_CTX_NOPRINT(ctx, flags);
+
+ } else {
+ pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++;
+ retval = -1;
+ }
+ /*
+ * keep it unfrozen at all times
+ */
+ pfm_unfreeze_pmu();
+
+ return retval;
+
+report_spurious1:
+ printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
+ this_cpu, task->pid);
+ pfm_unfreeze_pmu();
+ return -1;
+report_spurious2:
+ printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n",
+ this_cpu,
+ task->pid);
+ pfm_unfreeze_pmu();
+ return -1;
+}
+
+static irqreturn_t
+pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
+{
+ unsigned long start_cycles, total_cycles;
+ unsigned long min, max;
+ int this_cpu;
+ int ret;
+
+ this_cpu = get_cpu();
+ min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
+ max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;
+
+ start_cycles = ia64_get_itc();
+
+ ret = pfm_do_interrupt_handler(irq, arg, regs);
+
+ total_cycles = ia64_get_itc();
+
+ /*
+ * don't measure spurious interrupts
+ */
+ if (likely(ret == 0)) {
+ total_cycles -= start_cycles;
+
+ if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
+ if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;
+
+ pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
+ }
+ put_cpu_no_resched();
+ return IRQ_HANDLED;
+}
+
+/*
+ * /proc/perfmon interface, for debug only
+ */
+
+#define PFM_PROC_SHOW_HEADER ((void *)NR_CPUS+1)
+
+static void *
+pfm_proc_start(struct seq_file *m, loff_t *pos)
+{
+ if (*pos == 0) {
+ return PFM_PROC_SHOW_HEADER;
+ }
+
+ while (*pos <= NR_CPUS) {
+ if (cpu_online(*pos - 1)) {
+ return (void *)*pos;
+ }
+ ++*pos;
+ }
+ return NULL;
+}
+
+static void *
+pfm_proc_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return pfm_proc_start(m, pos);
+}
+
+static void
+pfm_proc_stop(struct seq_file *m, void *v)
+{
+}
+
+static void
+pfm_proc_show_header(struct seq_file *m)
+{
+ struct list_head * pos;
+ pfm_buffer_fmt_t * entry;
+ unsigned long flags;
+
+ seq_printf(m,
+ "perfmon version : %u.%u\n"
+ "model : %s\n"
+ "fastctxsw : %s\n"
+ "expert mode : %s\n"
+ "ovfl_mask : 0x%lx\n"
+ "PMU flags : 0x%x\n",
+ PFM_VERSION_MAJ, PFM_VERSION_MIN,
+ pmu_conf->pmu_name,
+ pfm_sysctl.fastctxsw > 0 ? "Yes": "No",
+ pfm_sysctl.expert_mode > 0 ? "Yes": "No",
+ pmu_conf->ovfl_val,
+ pmu_conf->flags);
+
+ LOCK_PFS(flags);
+
+ seq_printf(m,
+ "proc_sessions : %u\n"
+ "sys_sessions : %u\n"
+ "sys_use_dbregs : %u\n"
+ "ptrace_use_dbregs : %u\n",
+ pfm_sessions.pfs_task_sessions,
+ pfm_sessions.pfs_sys_sessions,
+ pfm_sessions.pfs_sys_use_dbregs,
+ pfm_sessions.pfs_ptrace_use_dbregs);
+
+ UNLOCK_PFS(flags);
+
+ spin_lock(&pfm_buffer_fmt_lock);
+
+ list_for_each(pos, &pfm_buffer_fmt_list) {
+ entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
+ seq_printf(m, "format : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n",
+ entry->fmt_uuid[0],
+ entry->fmt_uuid[1],
+ entry->fmt_uuid[2],
+ entry->fmt_uuid[3],
+ entry->fmt_uuid[4],
+ entry->fmt_uuid[5],
+ entry->fmt_uuid[6],
+ entry->fmt_uuid[7],
+ entry->fmt_uuid[8],
+ entry->fmt_uuid[9],
+ entry->fmt_uuid[10],
+ entry->fmt_uuid[11],
+ entry->fmt_uuid[12],
+ entry->fmt_uuid[13],
+ entry->fmt_uuid[14],
+ entry->fmt_uuid[15],
+ entry->fmt_name);
+ }
+ spin_unlock(&pfm_buffer_fmt_lock);
+
+}
+
+static int
+pfm_proc_show(struct seq_file *m, void *v)
+{
+ unsigned long psr;
+ unsigned int i;
+ int cpu;
+
+ if (v == PFM_PROC_SHOW_HEADER) {
+ pfm_proc_show_header(m);
+ return 0;
+ }
+
+ /* show info for CPU (v - 1) */
+
+ cpu = (long)v - 1;
+ seq_printf(m,
+ "CPU%-2d overflow intrs : %lu\n"
+ "CPU%-2d overflow cycles : %lu\n"
+ "CPU%-2d overflow min : %lu\n"
+ "CPU%-2d overflow max : %lu\n"
+ "CPU%-2d smpl handler calls : %lu\n"
+ "CPU%-2d smpl handler cycles : %lu\n"
+ "CPU%-2d spurious intrs : %lu\n"
+ "CPU%-2d replay intrs : %lu\n"
+ "CPU%-2d syst_wide : %d\n"
+ "CPU%-2d dcr_pp : %d\n"
+ "CPU%-2d exclude idle : %d\n"
+ "CPU%-2d owner : %d\n"
+ "CPU%-2d context : %p\n"
+ "CPU%-2d activations : %lu\n",
+ cpu, pfm_stats[cpu].pfm_ovfl_intr_count,
+ cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles,
+ cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min,
+ cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max,
+ cpu, pfm_stats[cpu].pfm_smpl_handler_calls,
+ cpu, pfm_stats[cpu].pfm_smpl_handler_cycles,
+ cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count,
+ cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count,
+ cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0,
+ cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0,
+ cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0,
+ cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1,
+ cpu, pfm_get_cpu_data(pmu_ctx, cpu),
+ cpu, pfm_get_cpu_data(pmu_activation_number, cpu));
+
+ if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) {
+
+ psr = pfm_get_psr();
+
+ ia64_srlz_d();
+
+ seq_printf(m,
+ "CPU%-2d psr : 0x%lx\n"
+ "CPU%-2d pmc0 : 0x%lx\n",
+ cpu, psr,
+ cpu, ia64_get_pmc(0));
+
+ for (i=0; PMC_IS_LAST(i) == 0; i++) {
+ if (PMC_IS_COUNTING(i) == 0) continue;
+ seq_printf(m,
+ "CPU%-2d pmc%u : 0x%lx\n"
+ "CPU%-2d pmd%u : 0x%lx\n",
+ cpu, i, ia64_get_pmc(i),
+ cpu, i, ia64_get_pmd(i));
+ }
+ }
+ return 0;
+}
+
+struct seq_operations pfm_seq_ops = {
+ .start = pfm_proc_start,
+ .next = pfm_proc_next,
+ .stop = pfm_proc_stop,
+ .show = pfm_proc_show
+};
+
+static int
+pfm_proc_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &pfm_seq_ops);
+}
+
+
+/*
+ * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
+ * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
+ * is active or inactive based on mode. We must rely on the value in
+ * local_cpu_data->pfm_syst_info
+ */
+void
+pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
+{
+ struct pt_regs *regs;
+ unsigned long dcr;
+ unsigned long dcr_pp;
+
+ dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0;
+
+ /*
+ * pid 0 is guaranteed to be the idle task. There is one such task with pid 0
+ * on every CPU, so we can rely on the pid to identify the idle task.
+ */
+ if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) {
+ regs = ia64_task_regs(task);
+ ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
+ return;
+ }
+ /*
+ * if monitoring has started
+ */
+ if (dcr_pp) {
+ dcr = ia64_getreg(_IA64_REG_CR_DCR);
+ /*
+ * context switching in?
+ */
+ if (is_ctxswin) {
+ /* mask monitoring for the idle task */
+ ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
+ pfm_clear_psr_pp();
+ ia64_srlz_i();
+ return;
+ }
+ /*
+ * context switching out
+ * restore monitoring for next task
+ *
+ * Due to inlining this odd if-then-else construction generates
+ * better code.
+ */
+ ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP);
+ pfm_set_psr_pp();
+ ia64_srlz_i();
+ }
+}
+
+#ifdef CONFIG_SMP
+
+static void
+pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
+{
+ struct task_struct *task = ctx->ctx_task;
+
+ ia64_psr(regs)->up = 0;
+ ia64_psr(regs)->sp = 1;
+
+ if (GET_PMU_OWNER() == task) {
+ DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
+ SET_PMU_OWNER(NULL, NULL);
+ }
+
+ /*
+ * disconnect the task from the context and vice-versa
+ */
+ PFM_SET_WORK_PENDING(task, 0);
+
+ task->thread.pfm_context = NULL;
+ task->thread.flags &= ~IA64_THREAD_PM_VALID;
+
+ DPRINT(("force cleanup for [%d]\n", task->pid));
+}
+
+
+/*
+ * in 2.6, interrupts are masked when we come here and the runqueue lock is held
+ */
+void
+pfm_save_regs(struct task_struct *task)
+{
+ pfm_context_t *ctx;
+ struct thread_struct *t;
+ unsigned long flags;
+ u64 psr;
+
+
+ ctx = PFM_GET_CTX(task);
+ if (ctx == NULL) return;
+ t = &task->thread;
+
+ /*
+ * we always come here with interrupts ALREADY disabled by
+ * the scheduler. So we simply need to protect against concurrent
+ * access, not CPU concurrency.
+ */
+ flags = pfm_protect_ctx_ctxsw(ctx);
+
+ if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
+ struct pt_regs *regs = ia64_task_regs(task);
+
+ pfm_clear_psr_up();
+
+ pfm_force_cleanup(ctx, regs);
+
+ BUG_ON(ctx->ctx_smpl_hdr);
+
+ pfm_unprotect_ctx_ctxsw(ctx, flags);
+
+ pfm_context_free(ctx);
+ return;
+ }
+
+ /*
+ * save current PSR: needed because we modify it
+ */
+ ia64_srlz_d();
+ psr = pfm_get_psr();
+
+ BUG_ON(psr & (IA64_PSR_I));
+
+ /*
+ * stop monitoring:
+ * This is the last instruction which may generate an overflow
+ *
+ * We do not need to set psr.sp because, it is irrelevant in kernel.
+ * It will be restored from ipsr when going back to user level
+ */
+ pfm_clear_psr_up();
+
+ /*
+ * keep a copy of psr.up (for reload)
+ */
+ ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;
+
+ /*
+ * release ownership of this PMU.
+ * PM interrupts are masked, so nothing
+ * can happen.
+ */
+ SET_PMU_OWNER(NULL, NULL);
+
+ /*
+ * we systematically save the PMD as we have no
+ * guarantee we will be schedule at that same
+ * CPU again.
+ */
+ pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
+
+ /*
+ * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
+ * we will need it on the restore path to check
+ * for pending overflow.
+ */
+ t->pmcs[0] = ia64_get_pmc(0);
+
+ /*
+ * unfreeze PMU if had pending overflows
+ */
+ if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
+
+ /*
+ * finally, allow context access.
+ * interrupts will still be masked after this call.
+ */
+ pfm_unprotect_ctx_ctxsw(ctx, flags);
+}
+
+#else /* !CONFIG_SMP */
+void
+pfm_save_regs(struct task_struct *task)
+{
+ pfm_context_t *ctx;
+ u64 psr;
+
+ ctx = PFM_GET_CTX(task);
+ if (ctx == NULL) return;
+
+ /*
+ * save current PSR: needed because we modify it
+ */
+ psr = pfm_get_psr();
+
+ BUG_ON(psr & (IA64_PSR_I));
+
+ /*
+ * stop monitoring:
+ * This is the last instruction which may generate an overflow
+ *
+ * We do not need to set psr.sp because, it is irrelevant in kernel.
+ * It will be restored from ipsr when going back to user level
+ */
+ pfm_clear_psr_up();
+
+ /*
+ * keep a copy of psr.up (for reload)
+ */
+ ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;
+}
+
+static void
+pfm_lazy_save_regs (struct task_struct *task)
+{
+ pfm_context_t *ctx;
+ struct thread_struct *t;
+ unsigned long flags;
+
+ { u64 psr = pfm_get_psr();
+ BUG_ON(psr & IA64_PSR_UP);
+ }
+
+ ctx = PFM_GET_CTX(task);
+ t = &task->thread;
+
+ /*
+ * we need to mask PMU overflow here to
+ * make sure that we maintain pmc0 until
+ * we save it. overflow interrupts are
+ * treated as spurious if there is no
+ * owner.
+ *
+ * XXX: I don't think this is necessary
+ */
+ PROTECT_CTX(ctx,flags);
+
+ /*
+ * release ownership of this PMU.
+ * must be done before we save the registers.
+ *
+ * after this call any PMU interrupt is treated
+ * as spurious.
+ */
+ SET_PMU_OWNER(NULL, NULL);
+
+ /*
+ * save all the pmds we use
+ */
+ pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
+
+ /*
+ * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
+ * it is needed to check for pended overflow
+ * on the restore path
+ */
+ t->pmcs[0] = ia64_get_pmc(0);
+
+ /*
+ * unfreeze PMU if had pending overflows
+ */
+ if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
+
+ /*
+ * now get can unmask PMU interrupts, they will
+ * be treated as purely spurious and we will not
+ * lose any information
+ */
+ UNPROTECT_CTX(ctx,flags);
+}
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_SMP
+/*
+ * in 2.6, interrupts are masked when we come here and the runqueue lock is held
+ */
+void
+pfm_load_regs (struct task_struct *task)
+{
+ pfm_context_t *ctx;
+ struct thread_struct *t;
+ unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
+ unsigned long flags;
+ u64 psr, psr_up;
+ int need_irq_resend;
+
+ ctx = PFM_GET_CTX(task);
+ if (unlikely(ctx == NULL)) return;
+
+ BUG_ON(GET_PMU_OWNER());
+
+ t = &task->thread;
+ /*
+ * possible on unload
+ */
+ if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return;
+
+ /*
+ * we always come here with interrupts ALREADY disabled by
+ * the scheduler. So we simply need to protect against concurrent
+ * access, not CPU concurrency.
+ */
+ flags = pfm_protect_ctx_ctxsw(ctx);
+ psr = pfm_get_psr();
+
+ need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;
+
+ BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
+ BUG_ON(psr & IA64_PSR_I);
+
+ if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) {
+ struct pt_regs *regs = ia64_task_regs(task);
+
+ BUG_ON(ctx->ctx_smpl_hdr);
+
+ pfm_force_cleanup(ctx, regs);
+
+ pfm_unprotect_ctx_ctxsw(ctx, flags);
+
+ /*
+ * this one (kmalloc'ed) is fine with interrupts disabled
+ */
+ pfm_context_free(ctx);
+
+ return;
+ }
+
+ /*
+ * we restore ALL the debug registers to avoid picking up
+ * stale state.
+ */
+ if (ctx->ctx_fl_using_dbreg) {
+ pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
+ pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
+ }
+ /*
+ * retrieve saved psr.up
+ */
+ psr_up = ctx->ctx_saved_psr_up;
+
+ /*
+ * if we were the last user of the PMU on that CPU,
+ * then nothing to do except restore psr
+ */
+ if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) {
+
+ /*
+ * retrieve partial reload masks (due to user modifications)
+ */
+ pmc_mask = ctx->ctx_reload_pmcs[0];
+ pmd_mask = ctx->ctx_reload_pmds[0];
+
+ } else {
+ /*
+ * To avoid leaking information to the user level when psr.sp=0,
+ * we must reload ALL implemented pmds (even the ones we don't use).
+ * In the kernel we only allow PFM_READ_PMDS on registers which
+ * we initialized or requested (sampling) so there is no risk there.
+ */
+ pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
+
+ /*
+ * ALL accessible PMCs are systematically reloaded, unused registers
+ * get their default (from pfm_reset_pmu_state()) values to avoid picking
+ * up stale configuration.
+ *
+ * PMC0 is never in the mask. It is always restored separately.
+ */
+ pmc_mask = ctx->ctx_all_pmcs[0];
+ }
+ /*
+ * when context is MASKED, we will restore PMC with plm=0
+ * and PMD with stale information, but that's ok, nothing
+ * will be captured.
+ *
+ * XXX: optimize here
+ */
+ if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask);
+ if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask);
+
+ /*
+ * check for pending overflow at the time the state
+ * was saved.
+ */
+ if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
+ /*
+ * reload pmc0 with the overflow information
+ * On McKinley PMU, this will trigger a PMU interrupt
+ */
+ ia64_set_pmc(0, t->pmcs[0]);
+ ia64_srlz_d();
+ t->pmcs[0] = 0UL;
+
+ /*
+ * will replay the PMU interrupt
+ */
+ if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR);
+
+ pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
+ }
+
+ /*
+ * we just did a reload, so we reset the partial reload fields
+ */
+ ctx->ctx_reload_pmcs[0] = 0UL;
+ ctx->ctx_reload_pmds[0] = 0UL;
+
+ SET_LAST_CPU(ctx, smp_processor_id());
+
+ /*
+ * dump activation value for this PMU
+ */
+ INC_ACTIVATION();
+ /*
+ * record current activation for this context
+ */
+ SET_ACTIVATION(ctx);
+
+ /*
+ * establish new ownership.
+ */
+ SET_PMU_OWNER(task, ctx);
+
+ /*
+ * restore the psr.up bit. measurement
+ * is active again.
+ * no PMU interrupt can happen at this point
+ * because we still have interrupts disabled.
+ */
+ if (likely(psr_up)) pfm_set_psr_up();
+
+ /*
+ * allow concurrent access to context
+ */
+ pfm_unprotect_ctx_ctxsw(ctx, flags);
+}
+#else /* !CONFIG_SMP */
+/*
+ * reload PMU state for UP kernels
+ * in 2.5 we come here with interrupts disabled
+ */
+void
+pfm_load_regs (struct task_struct *task)
+{
+ struct thread_struct *t;
+ pfm_context_t *ctx;
+ struct task_struct *owner;
+ unsigned long pmd_mask, pmc_mask;
+ u64 psr, psr_up;
+ int need_irq_resend;
+
+ owner = GET_PMU_OWNER();
+ ctx = PFM_GET_CTX(task);
+ t = &task->thread;
+ psr = pfm_get_psr();
+
+ BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
+ BUG_ON(psr & IA64_PSR_I);
+
+ /*
+ * we restore ALL the debug registers to avoid picking up
+ * stale state.
+ *
+ * This must be done even when the task is still the owner
+ * as the registers may have been modified via ptrace()
+ * (not perfmon) by the previous task.
+ */
+ if (ctx->ctx_fl_using_dbreg) {
+ pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
+ pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
+ }
+
+ /*
+ * retrieved saved psr.up
+ */
+ psr_up = ctx->ctx_saved_psr_up;
+ need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;
+
+ /*
+ * short path, our state is still there, just
+ * need to restore psr and we go
+ *
+ * we do not touch either PMC nor PMD. the psr is not touched
+ * by the overflow_handler. So we are safe w.r.t. to interrupt
+ * concurrency even without interrupt masking.
+ */
+ if (likely(owner == task)) {
+ if (likely(psr_up)) pfm_set_psr_up();
+ return;
+ }
+
+ /*
+ * someone else is still using the PMU, first push it out and
+ * then we'll be able to install our stuff !
+ *
+ * Upon return, there will be no owner for the current PMU
+ */
+ if (owner) pfm_lazy_save_regs(owner);
+
+ /*
+ * To avoid leaking information to the user level when psr.sp=0,
+ * we must reload ALL implemented pmds (even the ones we don't use).
+ * In the kernel we only allow PFM_READ_PMDS on registers which
+ * we initialized or requested (sampling) so there is no risk there.
+ */
+ pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
+
+ /*
+ * ALL accessible PMCs are systematically reloaded, unused registers
+ * get their default (from pfm_reset_pmu_state()) values to avoid picking
+ * up stale configuration.
+ *
+ * PMC0 is never in the mask. It is always restored separately
+ */
+ pmc_mask = ctx->ctx_all_pmcs[0];
+
+ pfm_restore_pmds(t->pmds, pmd_mask);
+ pfm_restore_pmcs(t->pmcs, pmc_mask);
+
+ /*
+ * check for pending overflow at the time the state
+ * was saved.
+ */
+ if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
+ /*
+ * reload pmc0 with the overflow information
+ * On McKinley PMU, this will trigger a PMU interrupt
+ */
+ ia64_set_pmc(0, t->pmcs[0]);
+ ia64_srlz_d();
+
+ t->pmcs[0] = 0UL;
+
+ /*
+ * will replay the PMU interrupt
+ */
+ if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR);
+
+ pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
+ }
+
+ /*
+ * establish new ownership.
+ */
+ SET_PMU_OWNER(task, ctx);
+
+ /*
+ * restore the psr.up bit. measurement
+ * is active again.
+ * no PMU interrupt can happen at this point
+ * because we still have interrupts disabled.
+ */
+ if (likely(psr_up)) pfm_set_psr_up();
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * this function assumes monitoring is stopped
+ */
+static void
+pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
+{
+ u64 pmc0;
+ unsigned long mask2, val, pmd_val, ovfl_val;
+ int i, can_access_pmu = 0;
+ int is_self;
+
+ /*
+ * is the caller the task being monitored (or which initiated the
+ * session for system wide measurements)
+ */
+ is_self = ctx->ctx_task == task ? 1 : 0;
+
+ /*
+ * can access PMU is task is the owner of the PMU state on the current CPU
+ * or if we are running on the CPU bound to the context in system-wide mode
+ * (that is not necessarily the task the context is attached to in this mode).
+ * In system-wide we always have can_access_pmu true because a task running on an
+ * invalid processor is flagged earlier in the call stack (see pfm_stop).
+ */
+ can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id());
+ if (can_access_pmu) {
+ /*
+ * Mark the PMU as not owned
+ * This will cause the interrupt handler to do nothing in case an overflow
+ * interrupt was in-flight
+ * This also guarantees that pmc0 will contain the final state
+ * It virtually gives us full control on overflow processing from that point
+ * on.
+ */
+ SET_PMU_OWNER(NULL, NULL);
+ DPRINT(("releasing ownership\n"));
+
+ /*
+ * read current overflow status:
+ *
+ * we are guaranteed to read the final stable state
+ */
+ ia64_srlz_d();
+ pmc0 = ia64_get_pmc(0); /* slow */
+
+ /*
+ * reset freeze bit, overflow status information destroyed
+ */
+ pfm_unfreeze_pmu();
+ } else {
+ pmc0 = task->thread.pmcs[0];
+ /*
+ * clear whatever overflow status bits there were
+ */
+ task->thread.pmcs[0] = 0;
+ }
+ ovfl_val = pmu_conf->ovfl_val;
+ /*
+ * we save all the used pmds
+ * we take care of overflows for counting PMDs
+ *
+ * XXX: sampling situation is not taken into account here
+ */
+ mask2 = ctx->ctx_used_pmds[0];
+
+ DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2));
+
+ for (i = 0; mask2; i++, mask2>>=1) {
+
+ /* skip non used pmds */
+ if ((mask2 & 0x1) == 0) continue;
+
+ /*
+ * can access PMU always true in system wide mode
+ */
+ val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i];
+
+ if (PMD_IS_COUNTING(i)) {
+ DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
+ task->pid,
+ i,
+ ctx->ctx_pmds[i].val,
+ val & ovfl_val));
+
+ /*
+ * we rebuild the full 64 bit value of the counter
+ */
+ val = ctx->ctx_pmds[i].val + (val & ovfl_val);
+
+ /*
+ * now everything is in ctx_pmds[] and we need
+ * to clear the saved context from save_regs() such that
+ * pfm_read_pmds() gets the correct value
+ */
+ pmd_val = 0UL;
+
+ /*
+ * take care of overflow inline
+ */
+ if (pmc0 & (1UL << i)) {
+ val += 1 + ovfl_val;
+ DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i));
+ }
+ }
+
+ DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val));
+
+ if (is_self) task->thread.pmds[i] = pmd_val;
+
+ ctx->ctx_pmds[i].val = val;
+ }
+}
+
+static struct irqaction perfmon_irqaction = {
+ .handler = pfm_interrupt_handler,
+ .flags = SA_INTERRUPT,
+ .name = "perfmon"
+};
+
+/*
+ * perfmon initialization routine, called from the initcall() table
+ */
+static int init_pfm_fs(void);
+
+static int __init
+pfm_probe_pmu(void)
+{
+ pmu_config_t **p;
+ int family;
+
+ family = local_cpu_data->family;
+ p = pmu_confs;
+
+ while(*p) {
+ if ((*p)->probe) {
+ if ((*p)->probe() == 0) goto found;
+ } else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) {
+ goto found;
+ }
+ p++;
+ }
+ return -1;
+found:
+ pmu_conf = *p;
+ return 0;
+}
+
+static struct file_operations pfm_proc_fops = {
+ .open = pfm_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+int __init
+pfm_init(void)
+{
+ unsigned int n, n_counters, i;
+
+ printk("perfmon: version %u.%u IRQ %u\n",
+ PFM_VERSION_MAJ,
+ PFM_VERSION_MIN,
+ IA64_PERFMON_VECTOR);
+
+ if (pfm_probe_pmu()) {
+ printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n",
+ local_cpu_data->family);
+ return -ENODEV;
+ }
+
+ /*
+ * compute the number of implemented PMD/PMC from the
+ * description tables
+ */
+ n = 0;
+ for (i=0; PMC_IS_LAST(i) == 0; i++) {
+ if (PMC_IS_IMPL(i) == 0) continue;
+ pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63);
+ n++;
+ }
+ pmu_conf->num_pmcs = n;
+
+ n = 0; n_counters = 0;
+ for (i=0; PMD_IS_LAST(i) == 0; i++) {
+ if (PMD_IS_IMPL(i) == 0) continue;
+ pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63);
+ n++;
+ if (PMD_IS_COUNTING(i)) n_counters++;
+ }
+ pmu_conf->num_pmds = n;
+ pmu_conf->num_counters = n_counters;
+
+ /*
+ * sanity checks on the number of debug registers
+ */
+ if (pmu_conf->use_rr_dbregs) {
+ if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) {
+ printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs);
+ pmu_conf = NULL;
+ return -1;
+ }
+ if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) {
+ printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs);
+ pmu_conf = NULL;
+ return -1;
+ }
+ }
+
+ printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n",
+ pmu_conf->pmu_name,
+ pmu_conf->num_pmcs,
+ pmu_conf->num_pmds,
+ pmu_conf->num_counters,
+ ffz(pmu_conf->ovfl_val));
+
+ /* sanity check */
+ if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) {
+ printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
+ pmu_conf = NULL;
+ return -1;
+ }
+
+ /*
+ * create /proc/perfmon (mostly for debugging purposes)
+ */
+ perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL);
+ if (perfmon_dir == NULL) {
+ printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
+ pmu_conf = NULL;
+ return -1;
+ }
+ /*
+ * install customized file operations for /proc/perfmon entry
+ */
+ perfmon_dir->proc_fops = &pfm_proc_fops;
+
+ /*
+ * create /proc/sys/kernel/perfmon (for debugging purposes)
+ */
+ pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);
+
+ /*
+ * initialize all our spinlocks
+ */
+ spin_lock_init(&pfm_sessions.pfs_lock);
+ spin_lock_init(&pfm_buffer_fmt_lock);
+
+ init_pfm_fs();
+
+ for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL;
+
+ return 0;
+}
+
+__initcall(pfm_init);
+
+/*
+ * this function is called before pfm_init()
+ */
+void
+pfm_init_percpu (void)
+{
+ /*
+ * make sure no measurement is active
+ * (may inherit programmed PMCs from EFI).
+ */
+ pfm_clear_psr_pp();
+ pfm_clear_psr_up();
+
+ /*
+ * we run with the PMU not frozen at all times
+ */
+ pfm_unfreeze_pmu();
+
+ if (smp_processor_id() == 0)
+ register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
+
+ ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
+ ia64_srlz_d();
+}
+
+/*
+ * used for debug purposes only
+ */
+void
+dump_pmu_state(const char *from)
+{
+ struct task_struct *task;
+ struct thread_struct *t;
+ struct pt_regs *regs;
+ pfm_context_t *ctx;
+ unsigned long psr, dcr, info, flags;
+ int i, this_cpu;
+
+ local_irq_save(flags);
+
+ this_cpu = smp_processor_id();
+ regs = ia64_task_regs(current);
+ info = PFM_CPUINFO_GET();
+ dcr = ia64_getreg(_IA64_REG_CR_DCR);
+
+ if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) {
+ local_irq_restore(flags);
+ return;
+ }
+
+ printk("CPU%d from %s() current [%d] iip=0x%lx %s\n",
+ this_cpu,
+ from,
+ current->pid,
+ regs->cr_iip,
+ current->comm);
+
+ task = GET_PMU_OWNER();
+ ctx = GET_PMU_CTX();
+
+ printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx);
+
+ psr = pfm_get_psr();
+
+ printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n",
+ this_cpu,
+ ia64_get_pmc(0),
+ psr & IA64_PSR_PP ? 1 : 0,
+ psr & IA64_PSR_UP ? 1 : 0,
+ dcr & IA64_DCR_PP ? 1 : 0,
+ info,
+ ia64_psr(regs)->up,
+ ia64_psr(regs)->pp);
+
+ ia64_psr(regs)->up = 0;
+ ia64_psr(regs)->pp = 0;
+
+ t = &current->thread;
+
+ for (i=1; PMC_IS_LAST(i) == 0; i++) {
+ if (PMC_IS_IMPL(i) == 0) continue;
+ printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]);
+ }
+
+ for (i=1; PMD_IS_LAST(i) == 0; i++) {
+ if (PMD_IS_IMPL(i) == 0) continue;
+ printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]);
+ }
+
+ if (ctx) {
+ printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n",
+ this_cpu,
+ ctx->ctx_state,
+ ctx->ctx_smpl_vaddr,
+ ctx->ctx_smpl_hdr,
+ ctx->ctx_msgq_head,
+ ctx->ctx_msgq_tail,
+ ctx->ctx_saved_psr_up);
+ }
+ local_irq_restore(flags);
+}
+
+/*
+ * called from process.c:copy_thread(). task is new child.
+ */
+void
+pfm_inherit(struct task_struct *task, struct pt_regs *regs)
+{
+ struct thread_struct *thread;
+
+ DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid));
+
+ thread = &task->thread;
+
+ /*
+ * cut links inherited from parent (current)
+ */
+ thread->pfm_context = NULL;
+
+ PFM_SET_WORK_PENDING(task, 0);
+
+ /*
+ * the psr bits are already set properly in copy_threads()
+ */
+}
+#else /* !CONFIG_PERFMON */
+asmlinkage long
+sys_perfmonctl (int fd, int cmd, void *arg, int count)
+{
+ return -ENOSYS;
+}
+#endif /* CONFIG_PERFMON */
diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c
new file mode 100644
index 00000000000..965d2900455
--- /dev/null
+++ b/arch/ia64/kernel/perfmon_default_smpl.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * This file implements the default sampling buffer format
+ * for the Linux/ia64 perfmon-2 subsystem.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <asm/delay.h>
+#include <linux/smp.h>
+
+#include <asm/perfmon.h>
+#include <asm/perfmon_default_smpl.h>
+
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
+MODULE_DESCRIPTION("perfmon default sampling format");
+MODULE_LICENSE("GPL");
+
+MODULE_PARM(debug, "i");
+MODULE_PARM_DESC(debug, "debug");
+
+MODULE_PARM(debug_ovfl, "i");
+MODULE_PARM_DESC(debug_ovfl, "debug ovfl");
+
+
+#define DEFAULT_DEBUG 1
+
+#ifdef DEFAULT_DEBUG
+#define DPRINT(a) \
+ do { \
+ if (unlikely(debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
+ } while (0)
+
+#define DPRINT_ovfl(a) \
+ do { \
+ if (unlikely(debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
+ } while (0)
+
+#else
+#define DPRINT(a)
+#define DPRINT_ovfl(a)
+#endif
+
+static int debug, debug_ovfl;
+
+static int
+default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data)
+{
+ pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data;
+ int ret = 0;
+
+ if (data == NULL) {
+ DPRINT(("[%d] no argument passed\n", task->pid));
+ return -EINVAL;
+ }
+
+ DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu));
+
+ /*
+ * must hold at least the buffer header + one minimally sized entry
+ */
+ if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL;
+
+ DPRINT(("buf_size=%lu\n", arg->buf_size));
+
+ return ret;
+}
+
+static int
+default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size)
+{
+ pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
+
+ /*
+ * size has been validated in default_validate
+ */
+ *size = arg->buf_size;
+
+ return 0;
+}
+
+static int
+default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data)
+{
+ pfm_default_smpl_hdr_t *hdr;
+ pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
+
+ hdr = (pfm_default_smpl_hdr_t *)buf;
+
+ hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION;
+ hdr->hdr_buf_size = arg->buf_size;
+ hdr->hdr_cur_offs = sizeof(*hdr);
+ hdr->hdr_overflows = 0UL;
+ hdr->hdr_count = 0UL;
+
+ DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
+ task->pid,
+ buf,
+ hdr->hdr_buf_size,
+ sizeof(*hdr),
+ hdr->hdr_version,
+ hdr->hdr_cur_offs));
+
+ return 0;
+}
+
+static int
+default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp)
+{
+ pfm_default_smpl_hdr_t *hdr;
+ pfm_default_smpl_entry_t *ent;
+ void *cur, *last;
+ unsigned long *e, entry_size;
+ unsigned int npmds, i;
+ unsigned char ovfl_pmd;
+ unsigned char ovfl_notify;
+
+ if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) {
+ DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg));
+ return -EINVAL;
+ }
+
+ hdr = (pfm_default_smpl_hdr_t *)buf;
+ cur = buf+hdr->hdr_cur_offs;
+ last = buf+hdr->hdr_buf_size;
+ ovfl_pmd = arg->ovfl_pmd;
+ ovfl_notify = arg->ovfl_notify;
+
+ /*
+ * precheck for sanity
+ */
+ if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
+
+ npmds = hweight64(arg->smpl_pmds[0]);
+
+ ent = (pfm_default_smpl_entry_t *)cur;
+
+ prefetch(arg->smpl_pmds_values);
+
+ entry_size = sizeof(*ent) + (npmds << 3);
+
+ /* position for first pmd */
+ e = (unsigned long *)(ent+1);
+
+ hdr->hdr_count++;
+
+ DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n",
+ task->pid,
+ hdr->hdr_count,
+ cur, last,
+ last-cur,
+ ovfl_pmd,
+ ovfl_notify, npmds));
+
+ /*
+ * current = task running at the time of the overflow.
+ *
+ * per-task mode:
+ * - this is ususally the task being monitored.
+ * Under certain conditions, it might be a different task
+ *
+ * system-wide:
+ * - this is not necessarily the task controlling the session
+ */
+ ent->pid = current->pid;
+ ent->ovfl_pmd = ovfl_pmd;
+ ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val;
+
+ /*
+ * where did the fault happen (includes slot number)
+ */
+ ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3);
+
+ ent->tstamp = stamp;
+ ent->cpu = smp_processor_id();
+ ent->set = arg->active_set;
+ ent->tgid = current->tgid;
+
+ /*
+ * selectively store PMDs in increasing index number
+ */
+ if (npmds) {
+ unsigned long *val = arg->smpl_pmds_values;
+ for(i=0; i < npmds; i++) {
+ *e++ = *val++;
+ }
+ }
+
+ /*
+ * update position for next entry
+ */
+ hdr->hdr_cur_offs += entry_size;
+ cur += entry_size;
+
+ /*
+ * post check to avoid losing the last sample
+ */
+ if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
+
+ /*
+ * keep same ovfl_pmds, ovfl_notify
+ */
+ arg->ovfl_ctrl.bits.notify_user = 0;
+ arg->ovfl_ctrl.bits.block_task = 0;
+ arg->ovfl_ctrl.bits.mask_monitoring = 0;
+ arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */
+
+ return 0;
+full:
+ DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify));
+
+ /*
+ * increment number of buffer overflow.
+ * important to detect duplicate set of samples.
+ */
+ hdr->hdr_overflows++;
+
+ /*
+ * if no notification requested, then we saturate the buffer
+ */
+ if (ovfl_notify == 0) {
+ arg->ovfl_ctrl.bits.notify_user = 0;
+ arg->ovfl_ctrl.bits.block_task = 0;
+ arg->ovfl_ctrl.bits.mask_monitoring = 1;
+ arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0;
+ } else {
+ arg->ovfl_ctrl.bits.notify_user = 1;
+ arg->ovfl_ctrl.bits.block_task = 1; /* ignored for non-blocking context */
+ arg->ovfl_ctrl.bits.mask_monitoring = 1;
+ arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */
+ }
+ return -1; /* we are full, sorry */
+}
+
+static int
+default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
+{
+ pfm_default_smpl_hdr_t *hdr;
+
+ hdr = (pfm_default_smpl_hdr_t *)buf;
+
+ hdr->hdr_count = 0UL;
+ hdr->hdr_cur_offs = sizeof(*hdr);
+
+ ctrl->bits.mask_monitoring = 0;
+ ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */
+
+ return 0;
+}
+
+static int
+default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
+{
+ DPRINT(("[%d] exit(%p)\n", task->pid, buf));
+ return 0;
+}
+
+static pfm_buffer_fmt_t default_fmt={
+ .fmt_name = "default_format",
+ .fmt_uuid = PFM_DEFAULT_SMPL_UUID,
+ .fmt_arg_size = sizeof(pfm_default_smpl_arg_t),
+ .fmt_validate = default_validate,
+ .fmt_getsize = default_get_size,
+ .fmt_init = default_init,
+ .fmt_handler = default_handler,
+ .fmt_restart = default_restart,
+ .fmt_restart_active = default_restart,
+ .fmt_exit = default_exit,
+};
+
+static int __init
+pfm_default_smpl_init_module(void)
+{
+ int ret;
+
+ ret = pfm_register_buffer_fmt(&default_fmt);
+ if (ret == 0) {
+ printk("perfmon_default_smpl: %s v%u.%u registered\n",
+ default_fmt.fmt_name,
+ PFM_DEFAULT_SMPL_VERSION_MAJ,
+ PFM_DEFAULT_SMPL_VERSION_MIN);
+ } else {
+ printk("perfmon_default_smpl: %s cannot register ret=%d\n",
+ default_fmt.fmt_name,
+ ret);
+ }
+
+ return ret;
+}
+
+static void __exit
+pfm_default_smpl_cleanup_module(void)
+{
+ int ret;
+ ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid);
+
+ printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret);
+}
+
+module_init(pfm_default_smpl_init_module);
+module_exit(pfm_default_smpl_cleanup_module);
+
diff --git a/arch/ia64/kernel/perfmon_generic.h b/arch/ia64/kernel/perfmon_generic.h
new file mode 100644
index 00000000000..67489478041
--- /dev/null
+++ b/arch/ia64/kernel/perfmon_generic.h
@@ -0,0 +1,45 @@
+/*
+ * This file contains the generic PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003 Hewlett Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ */
+
+static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+ { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd1 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd2 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd3 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+ { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_gen={
+ .pmu_name = "Generic",
+ .pmu_family = 0xff, /* any */
+ .ovfl_val = (1UL << 32) - 1,
+ .num_ibrs = 0, /* does not use */
+ .num_dbrs = 0, /* does not use */
+ .pmd_desc = pfm_gen_pmd_desc,
+ .pmc_desc = pfm_gen_pmc_desc
+};
+
diff --git a/arch/ia64/kernel/perfmon_itanium.h b/arch/ia64/kernel/perfmon_itanium.h
new file mode 100644
index 00000000000..d1d508a0fbd
--- /dev/null
+++ b/arch/ia64/kernel/perfmon_itanium.h
@@ -0,0 +1,115 @@
+/*
+ * This file contains the Itanium PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003 Hewlett Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ */
+static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+
+static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+ { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+/* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+ { PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static int
+pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+ int ret;
+ int is_loaded;
+
+ /* sanitfy check */
+ if (ctx == NULL) return -EINVAL;
+
+ is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
+
+ /*
+ * we must clear the (instruction) debug registers if pmc13.ta bit is cleared
+ * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
+ */
+ if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) {
+
+ DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val));
+
+ /* don't mix debug with perfmon */
+ if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+ /*
+ * a count of 0 will mark the debug registers as in use and also
+ * ensure that they are properly cleared.
+ */
+ ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs);
+ if (ret) return ret;
+ }
+
+ /*
+ * we must clear the (data) debug registers if pmc11.pt bit is cleared
+ * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
+ */
+ if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) {
+
+ DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val));
+
+ /* don't mix debug with perfmon */
+ if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+ /*
+ * a count of 0 will mark the debug registers as in use and also
+ * ensure that they are properly cleared.
+ */
+ ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs);
+ if (ret) return ret;
+ }
+ return 0;
+}
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_ita={
+ .pmu_name = "Itanium",
+ .pmu_family = 0x7,
+ .ovfl_val = (1UL << 32) - 1,
+ .pmd_desc = pfm_ita_pmd_desc,
+ .pmc_desc = pfm_ita_pmc_desc,
+ .num_ibrs = 8,
+ .num_dbrs = 8,
+ .use_rr_dbregs = 1, /* debug register are use for range retrictions */
+};
+
+
diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h
new file mode 100644
index 00000000000..9becccda289
--- /dev/null
+++ b/arch/ia64/kernel/perfmon_mckinley.h
@@ -0,0 +1,187 @@
+/*
+ * This file contains the McKinley PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003 Hewlett Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ */
+static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+
+static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL, pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc13 */ { PFM_REG_CONFIG , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc14 */ { PFM_REG_CONFIG , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc15 */ { PFM_REG_CONFIG , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+ { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd1 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd2 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd3 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+/* pmd8 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd9 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd10 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd11 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd12 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd13 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd14 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd15 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd16 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd17 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+ { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+/*
+ * PMC reserved fields must have their power-up values preserved
+ */
+static int
+pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+ unsigned long tmp1, tmp2, ival = *val;
+
+ /* remove reserved areas from user value */
+ tmp1 = ival & PMC_RSVD_MASK(cnum);
+
+ /* get reserved fields values */
+ tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
+
+ *val = tmp1 | tmp2;
+
+ DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
+ cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
+ return 0;
+}
+
+/*
+ * task can be NULL if the context is unloaded
+ */
+static int
+pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+ int ret = 0, check_case1 = 0;
+ unsigned long val8 = 0, val14 = 0, val13 = 0;
+ int is_loaded;
+
+ /* first preserve the reserved fields */
+ pfm_mck_reserved(cnum, val, regs);
+
+ /* sanitfy check */
+ if (ctx == NULL) return -EINVAL;
+
+ is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
+
+ /*
+ * we must clear the debug registers if pmc13 has a value which enable
+ * memory pipeline event constraints. In this case we need to clear the
+ * the debug registers if they have not yet been accessed. This is required
+ * to avoid picking stale state.
+ * PMC13 is "active" if:
+ * one of the pmc13.cfg_dbrpXX field is different from 0x3
+ * AND
+ * at the corresponding pmc13.ena_dbrpXX is set.
+ */
+ DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded));
+
+ if (cnum == 13 && is_loaded
+ && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
+
+ DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val));
+
+ /* don't mix debug with perfmon */
+ if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+ /*
+ * a count of 0 will mark the debug registers as in use and also
+ * ensure that they are properly cleared.
+ */
+ ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
+ if (ret) return ret;
+ }
+ /*
+ * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled
+ * before they are (fl_using_dbreg==0) to avoid picking up stale information.
+ */
+ if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) {
+
+ DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val));
+
+ /* don't mix debug with perfmon */
+ if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+ /*
+ * a count of 0 will mark the debug registers as in use and also
+ * ensure that they are properly cleared.
+ */
+ ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
+ if (ret) return ret;
+
+ }
+
+ switch(cnum) {
+ case 4: *val |= 1UL << 23; /* force power enable bit */
+ break;
+ case 8: val8 = *val;
+ val13 = ctx->ctx_pmcs[13];
+ val14 = ctx->ctx_pmcs[14];
+ check_case1 = 1;
+ break;
+ case 13: val8 = ctx->ctx_pmcs[8];
+ val13 = *val;
+ val14 = ctx->ctx_pmcs[14];
+ check_case1 = 1;
+ break;
+ case 14: val8 = ctx->ctx_pmcs[8];
+ val13 = ctx->ctx_pmcs[13];
+ val14 = *val;
+ check_case1 = 1;
+ break;
+ }
+ /* check illegal configuration which can produce inconsistencies in tagging
+ * i-side events in L1D and L2 caches
+ */
+ if (check_case1) {
+ ret = ((val13 >> 45) & 0xf) == 0
+ && ((val8 & 0x1) == 0)
+ && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
+ ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));
+
+ if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n"));
+ }
+
+ return ret ? -EINVAL : 0;
+}
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_mck={
+ .pmu_name = "Itanium 2",
+ .pmu_family = 0x1f,
+ .flags = PFM_PMU_IRQ_RESEND,
+ .ovfl_val = (1UL << 47) - 1,
+ .pmd_desc = pfm_mck_pmd_desc,
+ .pmc_desc = pfm_mck_pmc_desc,
+ .num_ibrs = 8,
+ .num_dbrs = 8,
+ .use_rr_dbregs = 1 /* debug register are use for range retrictions */
+};
+
+
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
new file mode 100644
index 00000000000..91293388dd2
--- /dev/null
+++ b/arch/ia64/kernel/process.c
@@ -0,0 +1,800 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#define __KERNEL_SYSCALLS__ /* see <asm/unistd.h> */
+#include <linux/config.h>
+
+#include <linux/cpu.h>
+#include <linux/pm.h>
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/personality.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/thread_info.h>
+#include <linux/unistd.h>
+#include <linux/efi.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+
+#include <asm/cpu.h>
+#include <asm/delay.h>
+#include <asm/elf.h>
+#include <asm/ia32.h>
+#include <asm/irq.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/unwind.h>
+#include <asm/user.h>
+
+#include "entry.h"
+
+#ifdef CONFIG_PERFMON
+# include <asm/perfmon.h>
+#endif
+
+#include "sigframe.h"
+
+void (*ia64_mark_idle)(int);
+static cpumask_t cpu_idle_map;
+
+unsigned long boot_option_idle_override = 0;
+EXPORT_SYMBOL(boot_option_idle_override);
+
+void
+ia64_do_show_stack (struct unw_frame_info *info, void *arg)
+{
+ unsigned long ip, sp, bsp;
+ char buf[128]; /* don't make it so big that it overflows the stack! */
+
+ printk("\nCall Trace:\n");
+ do {
+ unw_get_ip(info, &ip);
+ if (ip == 0)
+ break;
+
+ unw_get_sp(info, &sp);
+ unw_get_bsp(info, &bsp);
+ snprintf(buf, sizeof(buf),
+ " [<%016lx>] %%s\n"
+ " sp=%016lx bsp=%016lx\n",
+ ip, sp, bsp);
+ print_symbol(buf, ip);
+ } while (unw_unwind(info) >= 0);
+}
+
+void
+show_stack (struct task_struct *task, unsigned long *sp)
+{
+ if (!task)
+ unw_init_running(ia64_do_show_stack, NULL);
+ else {
+ struct unw_frame_info info;
+
+ unw_init_from_blocked_task(&info, task);
+ ia64_do_show_stack(&info, NULL);
+ }
+}
+
+void
+dump_stack (void)
+{
+ show_stack(NULL, NULL);
+}
+
+EXPORT_SYMBOL(dump_stack);
+
+void
+show_regs (struct pt_regs *regs)
+{
+ unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+
+ print_modules();
+ printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
+ printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s\n",
+ regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
+ print_symbol("ip is at %s\n", ip);
+ printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+ regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+ printk("rnat: %016lx bsps: %016lx pr : %016lx\n",
+ regs->ar_rnat, regs->ar_bspstore, regs->pr);
+ printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+ regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+ printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
+ printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, regs->b6, regs->b7);
+ printk("f6 : %05lx%016lx f7 : %05lx%016lx\n",
+ regs->f6.u.bits[1], regs->f6.u.bits[0],
+ regs->f7.u.bits[1], regs->f7.u.bits[0]);
+ printk("f8 : %05lx%016lx f9 : %05lx%016lx\n",
+ regs->f8.u.bits[1], regs->f8.u.bits[0],
+ regs->f9.u.bits[1], regs->f9.u.bits[0]);
+ printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
+ regs->f10.u.bits[1], regs->f10.u.bits[0],
+ regs->f11.u.bits[1], regs->f11.u.bits[0]);
+
+ printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, regs->r2, regs->r3);
+ printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10);
+ printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13);
+ printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16);
+ printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19);
+ printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22);
+ printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25);
+ printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28);
+ printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31);
+
+ if (user_mode(regs)) {
+ /* print the stacked registers */
+ unsigned long val, *bsp, ndirty;
+ int i, sof, is_nat = 0;
+
+ sof = regs->cr_ifs & 0x7f; /* size of frame */
+ ndirty = (regs->loadrs >> 19);
+ bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty);
+ for (i = 0; i < sof; ++i) {
+ get_user(val, (unsigned long __user *) ia64_rse_skip_regs(bsp, i));
+ printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val,
+ ((i == sof - 1) || (i % 3) == 2) ? "\n" : " ");
+ }
+ } else
+ show_stack(NULL, NULL);
+}
+
+void
+do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
+{
+ if (fsys_mode(current, &scr->pt)) {
+ /* defer signal-handling etc. until we return to privilege-level 0. */
+ if (!ia64_psr(&scr->pt)->lp)
+ ia64_psr(&scr->pt)->lp = 1;
+ return;
+ }
+
+#ifdef CONFIG_PERFMON
+ if (current->thread.pfm_needs_checking)
+ pfm_handle_work();
+#endif
+
+ /* deal with pending signal delivery */
+ if (test_thread_flag(TIF_SIGPENDING))
+ ia64_do_signal(oldset, scr, in_syscall);
+}
+
+static int pal_halt = 1;
+static int __init nohalt_setup(char * str)
+{
+ pal_halt = 0;
+ return 1;
+}
+__setup("nohalt", nohalt_setup);
+
+/*
+ * We use this if we don't have any better idle routine..
+ */
+void
+default_idle (void)
+{
+ unsigned long pmu_active = ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_PP | IA64_PSR_UP);
+
+ while (!need_resched())
+ if (pal_halt && !pmu_active)
+ safe_halt();
+ else
+ cpu_relax();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+ extern void ia64_cpu_local_tick (void);
+ /* Ack it */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
+
+ /* We shouldn't have to disable interrupts while dead, but
+ * some interrupts just don't seem to go away, and this makes
+ * it "work" for testing purposes. */
+ max_xtp();
+ local_irq_disable();
+ /* Death loop */
+ while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
+ cpu_relax();
+
+ /*
+ * Enable timer interrupts from now on
+ * Not required if we put processor in SAL_BOOT_RENDEZ mode.
+ */
+ local_flush_tlb_all();
+ cpu_set(smp_processor_id(), cpu_online_map);
+ wmb();
+ ia64_cpu_local_tick ();
+ local_irq_enable();
+}
+#else
+static inline void play_dead(void)
+{
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+
+void cpu_idle_wait(void)
+{
+ int cpu;
+ cpumask_t map;
+
+ for_each_online_cpu(cpu)
+ cpu_set(cpu, cpu_idle_map);
+
+ wmb();
+ do {
+ ssleep(1);
+ cpus_and(map, cpu_idle_map, cpu_online_map);
+ } while (!cpus_empty(map));
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+void __attribute__((noreturn))
+cpu_idle (void)
+{
+ void (*mark_idle)(int) = ia64_mark_idle;
+ int cpu = smp_processor_id();
+
+ /* endless idle loop with no priority at all */
+ while (1) {
+#ifdef CONFIG_SMP
+ if (!need_resched())
+ min_xtp();
+#endif
+ while (!need_resched()) {
+ void (*idle)(void);
+
+ if (mark_idle)
+ (*mark_idle)(1);
+
+ if (cpu_isset(cpu, cpu_idle_map))
+ cpu_clear(cpu, cpu_idle_map);
+ rmb();
+ idle = pm_idle;
+ if (!idle)
+ idle = default_idle;
+ (*idle)();
+ }
+
+ if (mark_idle)
+ (*mark_idle)(0);
+
+#ifdef CONFIG_SMP
+ normal_xtp();
+#endif
+ schedule();
+ check_pgt_cache();
+ if (cpu_is_offline(smp_processor_id()))
+ play_dead();
+ }
+}
+
+void
+ia64_save_extra (struct task_struct *task)
+{
+#ifdef CONFIG_PERFMON
+ unsigned long info;
+#endif
+
+ if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
+ ia64_save_debug_regs(&task->thread.dbr[0]);
+
+#ifdef CONFIG_PERFMON
+ if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+ pfm_save_regs(task);
+
+ info = __get_cpu_var(pfm_syst_info);
+ if (info & PFM_CPUINFO_SYST_WIDE)
+ pfm_syst_wide_update_task(task, info, 0);
+#endif
+
+#ifdef CONFIG_IA32_SUPPORT
+ if (IS_IA32_PROCESS(ia64_task_regs(task)))
+ ia32_save_state(task);
+#endif
+}
+
+void
+ia64_load_extra (struct task_struct *task)
+{
+#ifdef CONFIG_PERFMON
+ unsigned long info;
+#endif
+
+ if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
+ ia64_load_debug_regs(&task->thread.dbr[0]);
+
+#ifdef CONFIG_PERFMON
+ if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+ pfm_load_regs(task);
+
+ info = __get_cpu_var(pfm_syst_info);
+ if (info & PFM_CPUINFO_SYST_WIDE)
+ pfm_syst_wide_update_task(task, info, 1);
+#endif
+
+#ifdef CONFIG_IA32_SUPPORT
+ if (IS_IA32_PROCESS(ia64_task_regs(task)))
+ ia32_load_state(task);
+#endif
+}
+
+/*
+ * Copy the state of an ia-64 thread.
+ *
+ * We get here through the following call chain:
+ *
+ * from user-level: from kernel:
+ *
+ * <clone syscall> <some kernel call frames>
+ * sys_clone :
+ * do_fork do_fork
+ * copy_thread copy_thread
+ *
+ * This means that the stack layout is as follows:
+ *
+ * +---------------------+ (highest addr)
+ * | struct pt_regs |
+ * +---------------------+
+ * | struct switch_stack |
+ * +---------------------+
+ * | |
+ * | memory stack |
+ * | | <-- sp (lowest addr)
+ * +---------------------+
+ *
+ * Observe that we copy the unat values that are in pt_regs and switch_stack. Spilling an
+ * integer to address X causes bit N in ar.unat to be set to the NaT bit of the register,
+ * with N=(X & 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY if the
+ * pt_regs structure in the parent is congruent to that of the child, modulo 512. Since
+ * the stack is page aligned and the page size is at least 4KB, this is always the case,
+ * so there is nothing to worry about.
+ */
+int
+copy_thread (int nr, unsigned long clone_flags,
+ unsigned long user_stack_base, unsigned long user_stack_size,
+ struct task_struct *p, struct pt_regs *regs)
+{
+ extern char ia64_ret_from_clone, ia32_ret_from_clone;
+ struct switch_stack *child_stack, *stack;
+ unsigned long rbs, child_rbs, rbs_size;
+ struct pt_regs *child_ptregs;
+ int retval = 0;
+
+#ifdef CONFIG_SMP
+ /*
+ * For SMP idle threads, fork_by_hand() calls do_fork with
+ * NULL regs.
+ */
+ if (!regs)
+ return 0;
+#endif
+
+ stack = ((struct switch_stack *) regs) - 1;
+
+ child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1;
+ child_stack = (struct switch_stack *) child_ptregs - 1;
+
+ /* copy parent's switch_stack & pt_regs to child: */
+ memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
+
+ rbs = (unsigned long) current + IA64_RBS_OFFSET;
+ child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
+ rbs_size = stack->ar_bspstore - rbs;
+
+ /* copy the parent's register backing store to the child: */
+ memcpy((void *) child_rbs, (void *) rbs, rbs_size);
+
+ if (likely(user_mode(child_ptregs))) {
+ if ((clone_flags & CLONE_SETTLS) && !IS_IA32_PROCESS(regs))
+ child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */
+ if (user_stack_base) {
+ child_ptregs->r12 = user_stack_base + user_stack_size - 16;
+ child_ptregs->ar_bspstore = user_stack_base;
+ child_ptregs->ar_rnat = 0;
+ child_ptregs->loadrs = 0;
+ }
+ } else {
+ /*
+ * Note: we simply preserve the relative position of
+ * the stack pointer here. There is no need to
+ * allocate a scratch area here, since that will have
+ * been taken care of by the caller of sys_clone()
+ * already.
+ */
+ child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* kernel sp */
+ child_ptregs->r13 = (unsigned long) p; /* set `current' pointer */
+ }
+ child_stack->ar_bspstore = child_rbs + rbs_size;
+ if (IS_IA32_PROCESS(regs))
+ child_stack->b0 = (unsigned long) &ia32_ret_from_clone;
+ else
+ child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+ /* copy parts of thread_struct: */
+ p->thread.ksp = (unsigned long) child_stack - 16;
+
+ /* stop some PSR bits from being inherited.
+ * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
+ * therefore we must specify them explicitly here and not include them in
+ * IA64_PSR_BITS_TO_CLEAR.
+ */
+ child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+ & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
+
+ /*
+ * NOTE: The calling convention considers all floating point
+ * registers in the high partition (fph) to be scratch. Since
+ * the only way to get to this point is through a system call,
+ * we know that the values in fph are all dead. Hence, there
+ * is no need to inherit the fph state from the parent to the
+ * child and all we have to do is to make sure that
+ * IA64_THREAD_FPH_VALID is cleared in the child.
+ *
+ * XXX We could push this optimization a bit further by
+ * clearing IA64_THREAD_FPH_VALID on ANY system call.
+ * However, it's not clear this is worth doing. Also, it
+ * would be a slight deviation from the normal Linux system
+ * call behavior where scratch registers are preserved across
+ * system calls (unless used by the system call itself).
+ */
+# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \
+ | IA64_THREAD_PM_VALID)
+# define THREAD_FLAGS_TO_SET 0
+ p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
+ | THREAD_FLAGS_TO_SET);
+ ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */
+#ifdef CONFIG_IA32_SUPPORT
+ /*
+ * If we're cloning an IA32 task then save the IA32 extra
+ * state from the current task to the new task
+ */
+ if (IS_IA32_PROCESS(ia64_task_regs(current))) {
+ ia32_save_state(p);
+ if (clone_flags & CLONE_SETTLS)
+ retval = ia32_clone_tls(p, child_ptregs);
+
+ /* Copy partially mapped page list */
+ if (!retval)
+ retval = ia32_copy_partial_page_list(p, clone_flags);
+ }
+#endif
+
+#ifdef CONFIG_PERFMON
+ if (current->thread.pfm_context)
+ pfm_inherit(p, child_ptregs);
+#endif
+ return retval;
+}
+
+static void
+do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg)
+{
+ unsigned long mask, sp, nat_bits = 0, ip, ar_rnat, urbs_end, cfm;
+ elf_greg_t *dst = arg;
+ struct pt_regs *pt;
+ char nat;
+ int i;
+
+ memset(dst, 0, sizeof(elf_gregset_t)); /* don't leak any kernel bits to user-level */
+
+ if (unw_unwind_to_user(info) < 0)
+ return;
+
+ unw_get_sp(info, &sp);
+ pt = (struct pt_regs *) (sp + 16);
+
+ urbs_end = ia64_get_user_rbs_end(task, pt, &cfm);
+
+ if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0)
+ return;
+
+ ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) urbs_end),
+ &ar_rnat);
+
+ /*
+ * coredump format:
+ * r0-r31
+ * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+ * predicate registers (p0-p63)
+ * b0-b7
+ * ip cfm user-mask
+ * ar.rsc ar.bsp ar.bspstore ar.rnat
+ * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+ */
+
+ /* r0 is zero */
+ for (i = 1, mask = (1UL << i); i < 32; ++i) {
+ unw_get_gr(info, i, &dst[i], &nat);
+ if (nat)
+ nat_bits |= mask;
+ mask <<= 1;
+ }
+ dst[32] = nat_bits;
+ unw_get_pr(info, &dst[33]);
+
+ for (i = 0; i < 8; ++i)
+ unw_get_br(info, i, &dst[34 + i]);
+
+ unw_get_rp(info, &ip);
+ dst[42] = ip + ia64_psr(pt)->ri;
+ dst[43] = cfm;
+ dst[44] = pt->cr_ipsr & IA64_PSR_UM;
+
+ unw_get_ar(info, UNW_AR_RSC, &dst[45]);
+ /*
+ * For bsp and bspstore, unw_get_ar() would return the kernel
+ * addresses, but we need the user-level addresses instead:
+ */
+ dst[46] = urbs_end; /* note: by convention PT_AR_BSP points to the end of the urbs! */
+ dst[47] = pt->ar_bspstore;
+ dst[48] = ar_rnat;
+ unw_get_ar(info, UNW_AR_CCV, &dst[49]);
+ unw_get_ar(info, UNW_AR_UNAT, &dst[50]);
+ unw_get_ar(info, UNW_AR_FPSR, &dst[51]);
+ dst[52] = pt->ar_pfs; /* UNW_AR_PFS is == to pt->cr_ifs for interrupt frames */
+ unw_get_ar(info, UNW_AR_LC, &dst[53]);
+ unw_get_ar(info, UNW_AR_EC, &dst[54]);
+ unw_get_ar(info, UNW_AR_CSD, &dst[55]);
+ unw_get_ar(info, UNW_AR_SSD, &dst[56]);
+}
+
+void
+do_dump_task_fpu (struct task_struct *task, struct unw_frame_info *info, void *arg)
+{
+ elf_fpreg_t *dst = arg;
+ int i;
+
+ memset(dst, 0, sizeof(elf_fpregset_t)); /* don't leak any "random" bits */
+
+ if (unw_unwind_to_user(info) < 0)
+ return;
+
+ /* f0 is 0.0, f1 is 1.0 */
+
+ for (i = 2; i < 32; ++i)
+ unw_get_fr(info, i, dst + i);
+
+ ia64_flush_fph(task);
+ if ((task->thread.flags & IA64_THREAD_FPH_VALID) != 0)
+ memcpy(dst + 32, task->thread.fph, 96*16);
+}
+
+void
+do_copy_regs (struct unw_frame_info *info, void *arg)
+{
+ do_copy_task_regs(current, info, arg);
+}
+
+void
+do_dump_fpu (struct unw_frame_info *info, void *arg)
+{
+ do_dump_task_fpu(current, info, arg);
+}
+
+int
+dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
+{
+ struct unw_frame_info tcore_info;
+
+ if (current == task) {
+ unw_init_running(do_copy_regs, regs);
+ } else {
+ memset(&tcore_info, 0, sizeof(tcore_info));
+ unw_init_from_blocked_task(&tcore_info, task);
+ do_copy_task_regs(task, &tcore_info, regs);
+ }
+ return 1;
+}
+
+void
+ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
+{
+ unw_init_running(do_copy_regs, dst);
+}
+
+int
+dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
+{
+ struct unw_frame_info tcore_info;
+
+ if (current == task) {
+ unw_init_running(do_dump_fpu, dst);
+ } else {
+ memset(&tcore_info, 0, sizeof(tcore_info));
+ unw_init_from_blocked_task(&tcore_info, task);
+ do_dump_task_fpu(task, &tcore_info, dst);
+ }
+ return 1;
+}
+
+int
+dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
+{
+ unw_init_running(do_dump_fpu, dst);
+ return 1; /* f0-f31 are always valid so we always return 1 */
+}
+
+long
+sys_execve (char __user *filename, char __user * __user *argv, char __user * __user *envp,
+ struct pt_regs *regs)
+{
+ char *fname;
+ int error;
+
+ fname = getname(filename);
+ error = PTR_ERR(fname);
+ if (IS_ERR(fname))
+ goto out;
+ error = do_execve(fname, argv, envp, regs);
+ putname(fname);
+out:
+ return error;
+}
+
+pid_t
+kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
+{
+ extern void start_kernel_thread (void);
+ unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
+ struct {
+ struct switch_stack sw;
+ struct pt_regs pt;
+ } regs;
+
+ memset(&regs, 0, sizeof(regs));
+ regs.pt.cr_iip = helper_fptr[0]; /* set entry point (IP) */
+ regs.pt.r1 = helper_fptr[1]; /* set GP */
+ regs.pt.r9 = (unsigned long) fn; /* 1st argument */
+ regs.pt.r11 = (unsigned long) arg; /* 2nd argument */
+ /* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read. */
+ regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
+ regs.pt.cr_ifs = 1UL << 63; /* mark as valid, empty frame */
+ regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
+ regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
+ regs.sw.pr = (1 << PRED_KERNEL_STACK);
+ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/* This gets called from kernel_thread() via ia64_invoke_thread_helper(). */
+int
+kernel_thread_helper (int (*fn)(void *), void *arg)
+{
+#ifdef CONFIG_IA32_SUPPORT
+ if (IS_IA32_PROCESS(ia64_task_regs(current))) {
+ /* A kernel thread is always a 64-bit process. */
+ current->thread.map_base = DEFAULT_MAP_BASE;
+ current->thread.task_size = DEFAULT_TASK_SIZE;
+ ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob);
+ ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
+ }
+#endif
+ return (*fn)(arg);
+}
+
+/*
+ * Flush thread state. This is called when a thread does an execve().
+ */
+void
+flush_thread (void)
+{
+ /* drop floating-point and debug-register state if it exists: */
+ current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
+ ia64_drop_fpu(current);
+ if (IS_IA32_PROCESS(ia64_task_regs(current)))
+ ia32_drop_partial_page_list(current);
+}
+
+/*
+ * Clean up state associated with current thread. This is called when
+ * the thread calls exit().
+ */
+void
+exit_thread (void)
+{
+ ia64_drop_fpu(current);
+#ifdef CONFIG_PERFMON
+ /* if needed, stop monitoring and flush state to perfmon context */
+ if (current->thread.pfm_context)
+ pfm_exit_thread(current);
+
+ /* free debug register resources */
+ if (current->thread.flags & IA64_THREAD_DBG_VALID)
+ pfm_release_debug_registers(current);
+#endif
+ if (IS_IA32_PROCESS(ia64_task_regs(current)))
+ ia32_drop_partial_page_list(current);
+}
+
+unsigned long
+get_wchan (struct task_struct *p)
+{
+ struct unw_frame_info info;
+ unsigned long ip;
+ int count = 0;
+
+ /*
+ * Note: p may not be a blocked task (it could be current or
+ * another process running on some other CPU. Rather than
+ * trying to determine if p is really blocked, we just assume
+ * it's blocked and rely on the unwind routines to fail
+ * gracefully if the process wasn't really blocked after all.
+ * --davidm 99/12/15
+ */
+ unw_init_from_blocked_task(&info, p);
+ do {
+ if (unw_unwind(&info) < 0)
+ return 0;
+ unw_get_ip(&info, &ip);
+ if (!in_sched_functions(ip))
+ return ip;
+ } while (count++ < 16);
+ return 0;
+}
+
+void
+cpu_halt (void)
+{
+ pal_power_mgmt_info_u_t power_info[8];
+ unsigned long min_power;
+ int i, min_power_state;
+
+ if (ia64_pal_halt_info(power_info) != 0)
+ return;
+
+ min_power_state = 0;
+ min_power = power_info[0].pal_power_mgmt_info_s.power_consumption;
+ for (i = 1; i < 8; ++i)
+ if (power_info[i].pal_power_mgmt_info_s.im
+ && power_info[i].pal_power_mgmt_info_s.power_consumption < min_power) {
+ min_power = power_info[i].pal_power_mgmt_info_s.power_consumption;
+ min_power_state = i;
+ }
+
+ while (1)
+ ia64_pal_halt(min_power_state);
+}
+
+void
+machine_restart (char *restart_cmd)
+{
+ (*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL);
+}
+
+EXPORT_SYMBOL(machine_restart);
+
+void
+machine_halt (void)
+{
+ cpu_halt();
+}
+
+EXPORT_SYMBOL(machine_halt);
+
+void
+machine_power_off (void)
+{
+ if (pm_power_off)
+ pm_power_off();
+ machine_halt();
+}
+
+EXPORT_SYMBOL(machine_power_off);
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
new file mode 100644
index 00000000000..55789fcd721
--- /dev/null
+++ b/arch/ia64/kernel/ptrace.c
@@ -0,0 +1,1627 @@
+/*
+ * Kernel support for the ptrace() and syscall tracing interfaces.
+ *
+ * Copyright (C) 1999-2005 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Derived from the x86 and Alpha versions.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/smp_lock.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/audit.h>
+
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace_offsets.h>
+#include <asm/rse.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/unwind.h>
+#ifdef CONFIG_PERFMON
+#include <asm/perfmon.h>
+#endif
+
+#include "entry.h"
+
+/*
+ * Bits in the PSR that we allow ptrace() to change:
+ * be, up, ac, mfl, mfh (the user mask; five bits total)
+ * db (debug breakpoint fault; one bit)
+ * id (instruction debug fault disable; one bit)
+ * dd (data debug fault disable; one bit)
+ * ri (restart instruction; two bits)
+ * is (instruction set; one bit)
+ */
+#define IPSR_MASK (IA64_PSR_UM | IA64_PSR_DB | IA64_PSR_IS \
+ | IA64_PSR_ID | IA64_PSR_DD | IA64_PSR_RI)
+
+#define MASK(nbits) ((1UL << (nbits)) - 1) /* mask with NBITS bits set */
+#define PFM_MASK MASK(38)
+
+#define PTRACE_DEBUG 0
+
+#if PTRACE_DEBUG
+# define dprintk(format...) printk(format)
+# define inline
+#else
+# define dprintk(format...)
+#endif
+
+/* Return TRUE if PT was created due to kernel-entry via a system-call. */
+
+static inline int
+in_syscall (struct pt_regs *pt)
+{
+ return (long) pt->cr_ifs >= 0;
+}
+
+/*
+ * Collect the NaT bits for r1-r31 from scratch_unat and return a NaT
+ * bitset where bit i is set iff the NaT bit of register i is set.
+ */
+unsigned long
+ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat)
+{
+# define GET_BITS(first, last, unat) \
+ ({ \
+ unsigned long bit = ia64_unat_pos(&pt->r##first); \
+ unsigned long nbits = (last - first + 1); \
+ unsigned long mask = MASK(nbits) << first; \
+ unsigned long dist; \
+ if (bit < first) \
+ dist = 64 + bit - first; \
+ else \
+ dist = bit - first; \
+ ia64_rotr(unat, dist) & mask; \
+ })
+ unsigned long val;
+
+ /*
+ * Registers that are stored consecutively in struct pt_regs
+ * can be handled in parallel. If the register order in
+ * struct_pt_regs changes, this code MUST be updated.
+ */
+ val = GET_BITS( 1, 1, scratch_unat);
+ val |= GET_BITS( 2, 3, scratch_unat);
+ val |= GET_BITS(12, 13, scratch_unat);
+ val |= GET_BITS(14, 14, scratch_unat);
+ val |= GET_BITS(15, 15, scratch_unat);
+ val |= GET_BITS( 8, 11, scratch_unat);
+ val |= GET_BITS(16, 31, scratch_unat);
+ return val;
+
+# undef GET_BITS
+}
+
+/*
+ * Set the NaT bits for the scratch registers according to NAT and
+ * return the resulting unat (assuming the scratch registers are
+ * stored in PT).
+ */
+unsigned long
+ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat)
+{
+# define PUT_BITS(first, last, nat) \
+ ({ \
+ unsigned long bit = ia64_unat_pos(&pt->r##first); \
+ unsigned long nbits = (last - first + 1); \
+ unsigned long mask = MASK(nbits) << first; \
+ long dist; \
+ if (bit < first) \
+ dist = 64 + bit - first; \
+ else \
+ dist = bit - first; \
+ ia64_rotl(nat & mask, dist); \
+ })
+ unsigned long scratch_unat;
+
+ /*
+ * Registers that are stored consecutively in struct pt_regs
+ * can be handled in parallel. If the register order in
+ * struct_pt_regs changes, this code MUST be updated.
+ */
+ scratch_unat = PUT_BITS( 1, 1, nat);
+ scratch_unat |= PUT_BITS( 2, 3, nat);
+ scratch_unat |= PUT_BITS(12, 13, nat);
+ scratch_unat |= PUT_BITS(14, 14, nat);
+ scratch_unat |= PUT_BITS(15, 15, nat);
+ scratch_unat |= PUT_BITS( 8, 11, nat);
+ scratch_unat |= PUT_BITS(16, 31, nat);
+
+ return scratch_unat;
+
+# undef PUT_BITS
+}
+
+#define IA64_MLX_TEMPLATE 0x2
+#define IA64_MOVL_OPCODE 6
+
+void
+ia64_increment_ip (struct pt_regs *regs)
+{
+ unsigned long w0, ri = ia64_psr(regs)->ri + 1;
+
+ if (ri > 2) {
+ ri = 0;
+ regs->cr_iip += 16;
+ } else if (ri == 2) {
+ get_user(w0, (char __user *) regs->cr_iip + 0);
+ if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) {
+ /*
+ * rfi'ing to slot 2 of an MLX bundle causes
+ * an illegal operation fault. We don't want
+ * that to happen...
+ */
+ ri = 0;
+ regs->cr_iip += 16;
+ }
+ }
+ ia64_psr(regs)->ri = ri;
+}
+
+void
+ia64_decrement_ip (struct pt_regs *regs)
+{
+ unsigned long w0, ri = ia64_psr(regs)->ri - 1;
+
+ if (ia64_psr(regs)->ri == 0) {
+ regs->cr_iip -= 16;
+ ri = 2;
+ get_user(w0, (char __user *) regs->cr_iip + 0);
+ if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) {
+ /*
+ * rfi'ing to slot 2 of an MLX bundle causes
+ * an illegal operation fault. We don't want
+ * that to happen...
+ */
+ ri = 1;
+ }
+ }
+ ia64_psr(regs)->ri = ri;
+}
+
+/*
+ * This routine is used to read an rnat bits that are stored on the
+ * kernel backing store. Since, in general, the alignment of the user
+ * and kernel are different, this is not completely trivial. In
+ * essence, we need to construct the user RNAT based on up to two
+ * kernel RNAT values and/or the RNAT value saved in the child's
+ * pt_regs.
+ *
+ * user rbs
+ *
+ * +--------+ <-- lowest address
+ * | slot62 |
+ * +--------+
+ * | rnat | 0x....1f8
+ * +--------+
+ * | slot00 | \
+ * +--------+ |
+ * | slot01 | > child_regs->ar_rnat
+ * +--------+ |
+ * | slot02 | / kernel rbs
+ * +--------+ +--------+
+ * <- child_regs->ar_bspstore | slot61 | <-- krbs
+ * +- - - - + +--------+
+ * | slot62 |
+ * +- - - - + +--------+
+ * | rnat |
+ * +- - - - + +--------+
+ * vrnat | slot00 |
+ * +- - - - + +--------+
+ * = =
+ * +--------+
+ * | slot00 | \
+ * +--------+ |
+ * | slot01 | > child_stack->ar_rnat
+ * +--------+ |
+ * | slot02 | /
+ * +--------+
+ * <--- child_stack->ar_bspstore
+ *
+ * The way to think of this code is as follows: bit 0 in the user rnat
+ * corresponds to some bit N (0 <= N <= 62) in one of the kernel rnat
+ * value. The kernel rnat value holding this bit is stored in
+ * variable rnat0. rnat1 is loaded with the kernel rnat value that
+ * form the upper bits of the user rnat value.
+ *
+ * Boundary cases:
+ *
+ * o when reading the rnat "below" the first rnat slot on the kernel
+ * backing store, rnat0/rnat1 are set to 0 and the low order bits are
+ * merged in from pt->ar_rnat.
+ *
+ * o when reading the rnat "above" the last rnat slot on the kernel
+ * backing store, rnat0/rnat1 gets its value from sw->ar_rnat.
+ */
+static unsigned long
+get_rnat (struct task_struct *task, struct switch_stack *sw,
+ unsigned long *krbs, unsigned long *urnat_addr,
+ unsigned long *urbs_end)
+{
+ unsigned long rnat0 = 0, rnat1 = 0, urnat = 0, *slot0_kaddr;
+ unsigned long umask = 0, mask, m;
+ unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift;
+ long num_regs, nbits;
+ struct pt_regs *pt;
+
+ pt = ia64_task_regs(task);
+ kbsp = (unsigned long *) sw->ar_bspstore;
+ ubspstore = (unsigned long *) pt->ar_bspstore;
+
+ if (urbs_end < urnat_addr)
+ nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_end);
+ else
+ nbits = 63;
+ mask = MASK(nbits);
+ /*
+ * First, figure out which bit number slot 0 in user-land maps
+ * to in the kernel rnat. Do this by figuring out how many
+ * register slots we're beyond the user's backingstore and
+ * then computing the equivalent address in kernel space.
+ */
+ num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1);
+ slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs);
+ shift = ia64_rse_slot_num(slot0_kaddr);
+ rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr);
+ rnat0_kaddr = rnat1_kaddr - 64;
+
+ if (ubspstore + 63 > urnat_addr) {
+ /* some bits need to be merged in from pt->ar_rnat */
+ umask = MASK(ia64_rse_slot_num(ubspstore)) & mask;
+ urnat = (pt->ar_rnat & umask);
+ mask &= ~umask;
+ if (!mask)
+ return urnat;
+ }
+
+ m = mask << shift;
+ if (rnat0_kaddr >= kbsp)
+ rnat0 = sw->ar_rnat;
+ else if (rnat0_kaddr > krbs)
+ rnat0 = *rnat0_kaddr;
+ urnat |= (rnat0 & m) >> shift;
+
+ m = mask >> (63 - shift);
+ if (rnat1_kaddr >= kbsp)
+ rnat1 = sw->ar_rnat;
+ else if (rnat1_kaddr > krbs)
+ rnat1 = *rnat1_kaddr;
+ urnat |= (rnat1 & m) << (63 - shift);
+ return urnat;
+}
+
+/*
+ * The reverse of get_rnat.
+ */
+static void
+put_rnat (struct task_struct *task, struct switch_stack *sw,
+ unsigned long *krbs, unsigned long *urnat_addr, unsigned long urnat,
+ unsigned long *urbs_end)
+{
+ unsigned long rnat0 = 0, rnat1 = 0, *slot0_kaddr, umask = 0, mask, m;
+ unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift;
+ long num_regs, nbits;
+ struct pt_regs *pt;
+ unsigned long cfm, *urbs_kargs;
+
+ pt = ia64_task_regs(task);
+ kbsp = (unsigned long *) sw->ar_bspstore;
+ ubspstore = (unsigned long *) pt->ar_bspstore;
+
+ urbs_kargs = urbs_end;
+ if (in_syscall(pt)) {
+ /*
+ * If entered via syscall, don't allow user to set rnat bits
+ * for syscall args.
+ */
+ cfm = pt->cr_ifs;
+ urbs_kargs = ia64_rse_skip_regs(urbs_end, -(cfm & 0x7f));
+ }
+
+ if (urbs_kargs >= urnat_addr)
+ nbits = 63;
+ else {
+ if ((urnat_addr - 63) >= urbs_kargs)
+ return;
+ nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_kargs);
+ }
+ mask = MASK(nbits);
+
+ /*
+ * First, figure out which bit number slot 0 in user-land maps
+ * to in the kernel rnat. Do this by figuring out how many
+ * register slots we're beyond the user's backingstore and
+ * then computing the equivalent address in kernel space.
+ */
+ num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1);
+ slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs);
+ shift = ia64_rse_slot_num(slot0_kaddr);
+ rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr);
+ rnat0_kaddr = rnat1_kaddr - 64;
+
+ if (ubspstore + 63 > urnat_addr) {
+ /* some bits need to be place in pt->ar_rnat: */
+ umask = MASK(ia64_rse_slot_num(ubspstore)) & mask;
+ pt->ar_rnat = (pt->ar_rnat & ~umask) | (urnat & umask);
+ mask &= ~umask;
+ if (!mask)
+ return;
+ }
+ /*
+ * Note: Section 11.1 of the EAS guarantees that bit 63 of an
+ * rnat slot is ignored. so we don't have to clear it here.
+ */
+ rnat0 = (urnat << shift);
+ m = mask << shift;
+ if (rnat0_kaddr >= kbsp)
+ sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat0 & m);
+ else if (rnat0_kaddr > krbs)
+ *rnat0_kaddr = ((*rnat0_kaddr & ~m) | (rnat0 & m));
+
+ rnat1 = (urnat >> (63 - shift));
+ m = mask >> (63 - shift);
+ if (rnat1_kaddr >= kbsp)
+ sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat1 & m);
+ else if (rnat1_kaddr > krbs)
+ *rnat1_kaddr = ((*rnat1_kaddr & ~m) | (rnat1 & m));
+}
+
+static inline int
+on_kernel_rbs (unsigned long addr, unsigned long bspstore,
+ unsigned long urbs_end)
+{
+ unsigned long *rnat_addr = ia64_rse_rnat_addr((unsigned long *)
+ urbs_end);
+ return (addr >= bspstore && addr <= (unsigned long) rnat_addr);
+}
+
+/*
+ * Read a word from the user-level backing store of task CHILD. ADDR
+ * is the user-level address to read the word from, VAL a pointer to
+ * the return value, and USER_BSP gives the end of the user-level
+ * backing store (i.e., it's the address that would be in ar.bsp after
+ * the user executed a "cover" instruction).
+ *
+ * This routine takes care of accessing the kernel register backing
+ * store for those registers that got spilled there. It also takes
+ * care of calculating the appropriate RNaT collection words.
+ */
+long
+ia64_peek (struct task_struct *child, struct switch_stack *child_stack,
+ unsigned long user_rbs_end, unsigned long addr, long *val)
+{
+ unsigned long *bspstore, *krbs, regnum, *laddr, *urbs_end, *rnat_addr;
+ struct pt_regs *child_regs;
+ size_t copied;
+ long ret;
+
+ urbs_end = (long *) user_rbs_end;
+ laddr = (unsigned long *) addr;
+ child_regs = ia64_task_regs(child);
+ bspstore = (unsigned long *) child_regs->ar_bspstore;
+ krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+ if (on_kernel_rbs(addr, (unsigned long) bspstore,
+ (unsigned long) urbs_end))
+ {
+ /*
+ * Attempt to read the RBS in an area that's actually
+ * on the kernel RBS => read the corresponding bits in
+ * the kernel RBS.
+ */
+ rnat_addr = ia64_rse_rnat_addr(laddr);
+ ret = get_rnat(child, child_stack, krbs, rnat_addr, urbs_end);
+
+ if (laddr == rnat_addr) {
+ /* return NaT collection word itself */
+ *val = ret;
+ return 0;
+ }
+
+ if (((1UL << ia64_rse_slot_num(laddr)) & ret) != 0) {
+ /*
+ * It is implementation dependent whether the
+ * data portion of a NaT value gets saved on a
+ * st8.spill or RSE spill (e.g., see EAS 2.6,
+ * 4.4.4.6 Register Spill and Fill). To get
+ * consistent behavior across all possible
+ * IA-64 implementations, we return zero in
+ * this case.
+ */
+ *val = 0;
+ return 0;
+ }
+
+ if (laddr < urbs_end) {
+ /*
+ * The desired word is on the kernel RBS and
+ * is not a NaT.
+ */
+ regnum = ia64_rse_num_regs(bspstore, laddr);
+ *val = *ia64_rse_skip_regs(krbs, regnum);
+ return 0;
+ }
+ }
+ copied = access_process_vm(child, addr, &ret, sizeof(ret), 0);
+ if (copied != sizeof(ret))
+ return -EIO;
+ *val = ret;
+ return 0;
+}
+
+long
+ia64_poke (struct task_struct *child, struct switch_stack *child_stack,
+ unsigned long user_rbs_end, unsigned long addr, long val)
+{
+ unsigned long *bspstore, *krbs, regnum, *laddr;
+ unsigned long *urbs_end = (long *) user_rbs_end;
+ struct pt_regs *child_regs;
+
+ laddr = (unsigned long *) addr;
+ child_regs = ia64_task_regs(child);
+ bspstore = (unsigned long *) child_regs->ar_bspstore;
+ krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+ if (on_kernel_rbs(addr, (unsigned long) bspstore,
+ (unsigned long) urbs_end))
+ {
+ /*
+ * Attempt to write the RBS in an area that's actually
+ * on the kernel RBS => write the corresponding bits
+ * in the kernel RBS.
+ */
+ if (ia64_rse_is_rnat_slot(laddr))
+ put_rnat(child, child_stack, krbs, laddr, val,
+ urbs_end);
+ else {
+ if (laddr < urbs_end) {
+ regnum = ia64_rse_num_regs(bspstore, laddr);
+ *ia64_rse_skip_regs(krbs, regnum) = val;
+ }
+ }
+ } else if (access_process_vm(child, addr, &val, sizeof(val), 1)
+ != sizeof(val))
+ return -EIO;
+ return 0;
+}
+
+/*
+ * Calculate the address of the end of the user-level register backing
+ * store. This is the address that would have been stored in ar.bsp
+ * if the user had executed a "cover" instruction right before
+ * entering the kernel. If CFMP is not NULL, it is used to return the
+ * "current frame mask" that was active at the time the kernel was
+ * entered.
+ */
+unsigned long
+ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt,
+ unsigned long *cfmp)
+{
+ unsigned long *krbs, *bspstore, cfm = pt->cr_ifs;
+ long ndirty;
+
+ krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+ bspstore = (unsigned long *) pt->ar_bspstore;
+ ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
+
+ if (in_syscall(pt))
+ ndirty += (cfm & 0x7f);
+ else
+ cfm &= ~(1UL << 63); /* clear valid bit */
+
+ if (cfmp)
+ *cfmp = cfm;
+ return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty);
+}
+
+/*
+ * Synchronize (i.e, write) the RSE backing store living in kernel
+ * space to the VM of the CHILD task. SW and PT are the pointers to
+ * the switch_stack and pt_regs structures, respectively.
+ * USER_RBS_END is the user-level address at which the backing store
+ * ends.
+ */
+long
+ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw,
+ unsigned long user_rbs_start, unsigned long user_rbs_end)
+{
+ unsigned long addr, val;
+ long ret;
+
+ /* now copy word for word from kernel rbs to user rbs: */
+ for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) {
+ ret = ia64_peek(child, sw, user_rbs_end, addr, &val);
+ if (ret < 0)
+ return ret;
+ if (access_process_vm(child, addr, &val, sizeof(val), 1)
+ != sizeof(val))
+ return -EIO;
+ }
+ return 0;
+}
+
+static inline int
+thread_matches (struct task_struct *thread, unsigned long addr)
+{
+ unsigned long thread_rbs_end;
+ struct pt_regs *thread_regs;
+
+ if (ptrace_check_attach(thread, 0) < 0)
+ /*
+ * If the thread is not in an attachable state, we'll
+ * ignore it. The net effect is that if ADDR happens
+ * to overlap with the portion of the thread's
+ * register backing store that is currently residing
+ * on the thread's kernel stack, then ptrace() may end
+ * up accessing a stale value. But if the thread
+ * isn't stopped, that's a problem anyhow, so we're
+ * doing as well as we can...
+ */
+ return 0;
+
+ thread_regs = ia64_task_regs(thread);
+ thread_rbs_end = ia64_get_user_rbs_end(thread, thread_regs, NULL);
+ if (!on_kernel_rbs(addr, thread_regs->ar_bspstore, thread_rbs_end))
+ return 0;
+
+ return 1; /* looks like we've got a winner */
+}
+
+/*
+ * GDB apparently wants to be able to read the register-backing store
+ * of any thread when attached to a given process. If we are peeking
+ * or poking an address that happens to reside in the kernel-backing
+ * store of another thread, we need to attach to that thread, because
+ * otherwise we end up accessing stale data.
+ *
+ * task_list_lock must be read-locked before calling this routine!
+ */
+static struct task_struct *
+find_thread_for_addr (struct task_struct *child, unsigned long addr)
+{
+ struct task_struct *g, *p;
+ struct mm_struct *mm;
+ int mm_users;
+
+ if (!(mm = get_task_mm(child)))
+ return child;
+
+ /* -1 because of our get_task_mm(): */
+ mm_users = atomic_read(&mm->mm_users) - 1;
+ if (mm_users <= 1)
+ goto out; /* not multi-threaded */
+
+ /*
+ * First, traverse the child's thread-list. Good for scalability with
+ * NPTL-threads.
+ */
+ p = child;
+ do {
+ if (thread_matches(p, addr)) {
+ child = p;
+ goto out;
+ }
+ if (mm_users-- <= 1)
+ goto out;
+ } while ((p = next_thread(p)) != child);
+
+ do_each_thread(g, p) {
+ if (child->mm != mm)
+ continue;
+
+ if (thread_matches(p, addr)) {
+ child = p;
+ goto out;
+ }
+ } while_each_thread(g, p);
+ out:
+ mmput(mm);
+ return child;
+}
+
+/*
+ * Write f32-f127 back to task->thread.fph if it has been modified.
+ */
+inline void
+ia64_flush_fph (struct task_struct *task)
+{
+ struct ia64_psr *psr = ia64_psr(ia64_task_regs(task));
+
+ if (ia64_is_local_fpu_owner(task) && psr->mfh) {
+ psr->mfh = 0;
+ task->thread.flags |= IA64_THREAD_FPH_VALID;
+ ia64_save_fpu(&task->thread.fph[0]);
+ }
+}
+
+/*
+ * Sync the fph state of the task so that it can be manipulated
+ * through thread.fph. If necessary, f32-f127 are written back to
+ * thread.fph or, if the fph state hasn't been used before, thread.fph
+ * is cleared to zeroes. Also, access to f32-f127 is disabled to
+ * ensure that the task picks up the state from thread.fph when it
+ * executes again.
+ */
+void
+ia64_sync_fph (struct task_struct *task)
+{
+ struct ia64_psr *psr = ia64_psr(ia64_task_regs(task));
+
+ ia64_flush_fph(task);
+ if (!(task->thread.flags & IA64_THREAD_FPH_VALID)) {
+ task->thread.flags |= IA64_THREAD_FPH_VALID;
+ memset(&task->thread.fph, 0, sizeof(task->thread.fph));
+ }
+ ia64_drop_fpu(task);
+ psr->dfh = 1;
+}
+
+static int
+access_fr (struct unw_frame_info *info, int regnum, int hi,
+ unsigned long *data, int write_access)
+{
+ struct ia64_fpreg fpval;
+ int ret;
+
+ ret = unw_get_fr(info, regnum, &fpval);
+ if (ret < 0)
+ return ret;
+
+ if (write_access) {
+ fpval.u.bits[hi] = *data;
+ ret = unw_set_fr(info, regnum, fpval);
+ } else
+ *data = fpval.u.bits[hi];
+ return ret;
+}
+
+/*
+ * Change the machine-state of CHILD such that it will return via the normal
+ * kernel exit-path, rather than the syscall-exit path.
+ */
+static void
+convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt,
+ unsigned long cfm)
+{
+ struct unw_frame_info info, prev_info;
+ unsigned long ip, pr;
+
+ unw_init_from_blocked_task(&info, child);
+ while (1) {
+ prev_info = info;
+ if (unw_unwind(&info) < 0)
+ return;
+ if (unw_get_rp(&info, &ip) < 0)
+ return;
+ if (ip < FIXADDR_USER_END)
+ break;
+ }
+
+ unw_get_pr(&prev_info, &pr);
+ pr &= ~(1UL << PRED_SYSCALL);
+ pr |= (1UL << PRED_NON_SYSCALL);
+ unw_set_pr(&prev_info, pr);
+
+ pt->cr_ifs = (1UL << 63) | cfm;
+}
+
+static int
+access_nat_bits (struct task_struct *child, struct pt_regs *pt,
+ struct unw_frame_info *info,
+ unsigned long *data, int write_access)
+{
+ unsigned long regnum, nat_bits, scratch_unat, dummy = 0;
+ char nat = 0;
+
+ if (write_access) {
+ nat_bits = *data;
+ scratch_unat = ia64_put_scratch_nat_bits(pt, nat_bits);
+ if (unw_set_ar(info, UNW_AR_UNAT, scratch_unat) < 0) {
+ dprintk("ptrace: failed to set ar.unat\n");
+ return -1;
+ }
+ for (regnum = 4; regnum <= 7; ++regnum) {
+ unw_get_gr(info, regnum, &dummy, &nat);
+ unw_set_gr(info, regnum, dummy,
+ (nat_bits >> regnum) & 1);
+ }
+ } else {
+ if (unw_get_ar(info, UNW_AR_UNAT, &scratch_unat) < 0) {
+ dprintk("ptrace: failed to read ar.unat\n");
+ return -1;
+ }
+ nat_bits = ia64_get_scratch_nat_bits(pt, scratch_unat);
+ for (regnum = 4; regnum <= 7; ++regnum) {
+ unw_get_gr(info, regnum, &dummy, &nat);
+ nat_bits |= (nat != 0) << regnum;
+ }
+ *data = nat_bits;
+ }
+ return 0;
+}
+
+static int
+access_uarea (struct task_struct *child, unsigned long addr,
+ unsigned long *data, int write_access)
+{
+ unsigned long *ptr, regnum, urbs_end, rnat_addr, cfm;
+ struct switch_stack *sw;
+ struct pt_regs *pt;
+# define pt_reg_addr(pt, reg) ((void *) \
+ ((unsigned long) (pt) \
+ + offsetof(struct pt_regs, reg)))
+
+
+ pt = ia64_task_regs(child);
+ sw = (struct switch_stack *) (child->thread.ksp + 16);
+
+ if ((addr & 0x7) != 0) {
+ dprintk("ptrace: unaligned register address 0x%lx\n", addr);
+ return -1;
+ }
+
+ if (addr < PT_F127 + 16) {
+ /* accessing fph */
+ if (write_access)
+ ia64_sync_fph(child);
+ else
+ ia64_flush_fph(child);
+ ptr = (unsigned long *)
+ ((unsigned long) &child->thread.fph + addr);
+ } else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) {
+ /* scratch registers untouched by kernel (saved in pt_regs) */
+ ptr = pt_reg_addr(pt, f10) + (addr - PT_F10);
+ } else if (addr >= PT_F12 && addr < PT_F15 + 16) {
+ /*
+ * Scratch registers untouched by kernel (saved in
+ * switch_stack).
+ */
+ ptr = (unsigned long *) ((long) sw
+ + (addr - PT_NAT_BITS - 32));
+ } else if (addr < PT_AR_LC + 8) {
+ /* preserved state: */
+ struct unw_frame_info info;
+ char nat = 0;
+ int ret;
+
+ unw_init_from_blocked_task(&info, child);
+ if (unw_unwind_to_user(&info) < 0)
+ return -1;
+
+ switch (addr) {
+ case PT_NAT_BITS:
+ return access_nat_bits(child, pt, &info,
+ data, write_access);
+
+ case PT_R4: case PT_R5: case PT_R6: case PT_R7:
+ if (write_access) {
+ /* read NaT bit first: */
+ unsigned long dummy;
+
+ ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4,
+ &dummy, &nat);
+ if (ret < 0)
+ return ret;
+ }
+ return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data,
+ &nat, write_access);
+
+ case PT_B1: case PT_B2: case PT_B3:
+ case PT_B4: case PT_B5:
+ return unw_access_br(&info, (addr - PT_B1)/8 + 1, data,
+ write_access);
+
+ case PT_AR_EC:
+ return unw_access_ar(&info, UNW_AR_EC, data,
+ write_access);
+
+ case PT_AR_LC:
+ return unw_access_ar(&info, UNW_AR_LC, data,
+ write_access);
+
+ default:
+ if (addr >= PT_F2 && addr < PT_F5 + 16)
+ return access_fr(&info, (addr - PT_F2)/16 + 2,
+ (addr & 8) != 0, data,
+ write_access);
+ else if (addr >= PT_F16 && addr < PT_F31 + 16)
+ return access_fr(&info,
+ (addr - PT_F16)/16 + 16,
+ (addr & 8) != 0,
+ data, write_access);
+ else {
+ dprintk("ptrace: rejecting access to register "
+ "address 0x%lx\n", addr);
+ return -1;
+ }
+ }
+ } else if (addr < PT_F9+16) {
+ /* scratch state */
+ switch (addr) {
+ case PT_AR_BSP:
+ /*
+ * By convention, we use PT_AR_BSP to refer to
+ * the end of the user-level backing store.
+ * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
+ * to get the real value of ar.bsp at the time
+ * the kernel was entered.
+ *
+ * Furthermore, when changing the contents of
+ * PT_AR_BSP (or PT_CFM) we MUST copy any
+ * users-level stacked registers that are
+ * stored on the kernel stack back to
+ * user-space because otherwise, we might end
+ * up clobbering kernel stacked registers.
+ * Also, if this happens while the task is
+ * blocked in a system call, which convert the
+ * state such that the non-system-call exit
+ * path is used. This ensures that the proper
+ * state will be picked up when resuming
+ * execution. However, it *also* means that
+ * once we write PT_AR_BSP/PT_CFM, it won't be
+ * possible to modify the syscall arguments of
+ * the pending system call any longer. This
+ * shouldn't be an issue because modifying
+ * PT_AR_BSP/PT_CFM generally implies that
+ * we're either abandoning the pending system
+ * call or that we defer it's re-execution
+ * (e.g., due to GDB doing an inferior
+ * function call).
+ */
+ urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
+ if (write_access) {
+ if (*data != urbs_end) {
+ if (ia64_sync_user_rbs(child, sw,
+ pt->ar_bspstore,
+ urbs_end) < 0)
+ return -1;
+ if (in_syscall(pt))
+ convert_to_non_syscall(child,
+ pt,
+ cfm);
+ /*
+ * Simulate user-level write
+ * of ar.bsp:
+ */
+ pt->loadrs = 0;
+ pt->ar_bspstore = *data;
+ }
+ } else
+ *data = urbs_end;
+ return 0;
+
+ case PT_CFM:
+ urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
+ if (write_access) {
+ if (((cfm ^ *data) & PFM_MASK) != 0) {
+ if (ia64_sync_user_rbs(child, sw,
+ pt->ar_bspstore,
+ urbs_end) < 0)
+ return -1;
+ if (in_syscall(pt))
+ convert_to_non_syscall(child,
+ pt,
+ cfm);
+ pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
+ | (*data & PFM_MASK));
+ }
+ } else
+ *data = cfm;
+ return 0;
+
+ case PT_CR_IPSR:
+ if (write_access)
+ pt->cr_ipsr = ((*data & IPSR_MASK)
+ | (pt->cr_ipsr & ~IPSR_MASK));
+ else
+ *data = (pt->cr_ipsr & IPSR_MASK);
+ return 0;
+
+ case PT_AR_RNAT:
+ urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
+ rnat_addr = (long) ia64_rse_rnat_addr((long *)
+ urbs_end);
+ if (write_access)
+ return ia64_poke(child, sw, urbs_end,
+ rnat_addr, *data);
+ else
+ return ia64_peek(child, sw, urbs_end,
+ rnat_addr, data);
+
+ case PT_R1:
+ ptr = pt_reg_addr(pt, r1);
+ break;
+ case PT_R2: case PT_R3:
+ ptr = pt_reg_addr(pt, r2) + (addr - PT_R2);
+ break;
+ case PT_R8: case PT_R9: case PT_R10: case PT_R11:
+ ptr = pt_reg_addr(pt, r8) + (addr - PT_R8);
+ break;
+ case PT_R12: case PT_R13:
+ ptr = pt_reg_addr(pt, r12) + (addr - PT_R12);
+ break;
+ case PT_R14:
+ ptr = pt_reg_addr(pt, r14);
+ break;
+ case PT_R15:
+ ptr = pt_reg_addr(pt, r15);
+ break;
+ case PT_R16: case PT_R17: case PT_R18: case PT_R19:
+ case PT_R20: case PT_R21: case PT_R22: case PT_R23:
+ case PT_R24: case PT_R25: case PT_R26: case PT_R27:
+ case PT_R28: case PT_R29: case PT_R30: case PT_R31:
+ ptr = pt_reg_addr(pt, r16) + (addr - PT_R16);
+ break;
+ case PT_B0:
+ ptr = pt_reg_addr(pt, b0);
+ break;
+ case PT_B6:
+ ptr = pt_reg_addr(pt, b6);
+ break;
+ case PT_B7:
+ ptr = pt_reg_addr(pt, b7);
+ break;
+ case PT_F6: case PT_F6+8: case PT_F7: case PT_F7+8:
+ case PT_F8: case PT_F8+8: case PT_F9: case PT_F9+8:
+ ptr = pt_reg_addr(pt, f6) + (addr - PT_F6);
+ break;
+ case PT_AR_BSPSTORE:
+ ptr = pt_reg_addr(pt, ar_bspstore);
+ break;
+ case PT_AR_RSC:
+ ptr = pt_reg_addr(pt, ar_rsc);
+ break;
+ case PT_AR_UNAT:
+ ptr = pt_reg_addr(pt, ar_unat);
+ break;
+ case PT_AR_PFS:
+ ptr = pt_reg_addr(pt, ar_pfs);
+ break;
+ case PT_AR_CCV:
+ ptr = pt_reg_addr(pt, ar_ccv);
+ break;
+ case PT_AR_FPSR:
+ ptr = pt_reg_addr(pt, ar_fpsr);
+ break;
+ case PT_CR_IIP:
+ ptr = pt_reg_addr(pt, cr_iip);
+ break;
+ case PT_PR:
+ ptr = pt_reg_addr(pt, pr);
+ break;
+ /* scratch register */
+
+ default:
+ /* disallow accessing anything else... */
+ dprintk("ptrace: rejecting access to register "
+ "address 0x%lx\n", addr);
+ return -1;
+ }
+ } else if (addr <= PT_AR_SSD) {
+ ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD);
+ } else {
+ /* access debug registers */
+
+ if (addr >= PT_IBR) {
+ regnum = (addr - PT_IBR) >> 3;
+ ptr = &child->thread.ibr[0];
+ } else {
+ regnum = (addr - PT_DBR) >> 3;
+ ptr = &child->thread.dbr[0];
+ }
+
+ if (regnum >= 8) {
+ dprintk("ptrace: rejecting access to register "
+ "address 0x%lx\n", addr);
+ return -1;
+ }
+#ifdef CONFIG_PERFMON
+ /*
+ * Check if debug registers are used by perfmon. This
+ * test must be done once we know that we can do the
+ * operation, i.e. the arguments are all valid, but
+ * before we start modifying the state.
+ *
+ * Perfmon needs to keep a count of how many processes
+ * are trying to modify the debug registers for system
+ * wide monitoring sessions.
+ *
+ * We also include read access here, because they may
+ * cause the PMU-installed debug register state
+ * (dbr[], ibr[]) to be reset. The two arrays are also
+ * used by perfmon, but we do not use
+ * IA64_THREAD_DBG_VALID. The registers are restored
+ * by the PMU context switch code.
+ */
+ if (pfm_use_debug_registers(child)) return -1;
+#endif
+
+ if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
+ child->thread.flags |= IA64_THREAD_DBG_VALID;
+ memset(child->thread.dbr, 0,
+ sizeof(child->thread.dbr));
+ memset(child->thread.ibr, 0,
+ sizeof(child->thread.ibr));
+ }
+
+ ptr += regnum;
+
+ if ((regnum & 1) && write_access) {
+ /* don't let the user set kernel-level breakpoints: */
+ *ptr = *data & ~(7UL << 56);
+ return 0;
+ }
+ }
+ if (write_access)
+ *ptr = *data;
+ else
+ *data = *ptr;
+ return 0;
+}
+
+static long
+ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
+{
+ unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val;
+ struct unw_frame_info info;
+ struct ia64_fpreg fpval;
+ struct switch_stack *sw;
+ struct pt_regs *pt;
+ long ret, retval = 0;
+ char nat = 0;
+ int i;
+
+ if (!access_ok(VERIFY_WRITE, ppr, sizeof(struct pt_all_user_regs)))
+ return -EIO;
+
+ pt = ia64_task_regs(child);
+ sw = (struct switch_stack *) (child->thread.ksp + 16);
+ unw_init_from_blocked_task(&info, child);
+ if (unw_unwind_to_user(&info) < 0) {
+ return -EIO;
+ }
+
+ if (((unsigned long) ppr & 0x7) != 0) {
+ dprintk("ptrace:unaligned register address %p\n", ppr);
+ return -EIO;
+ }
+
+ if (access_uarea(child, PT_CR_IPSR, &psr, 0) < 0
+ || access_uarea(child, PT_AR_EC, &ec, 0) < 0
+ || access_uarea(child, PT_AR_LC, &lc, 0) < 0
+ || access_uarea(child, PT_AR_RNAT, &rnat, 0) < 0
+ || access_uarea(child, PT_AR_BSP, &bsp, 0) < 0
+ || access_uarea(child, PT_CFM, &cfm, 0)
+ || access_uarea(child, PT_NAT_BITS, &nat_bits, 0))
+ return -EIO;
+
+ /* control regs */
+
+ retval |= __put_user(pt->cr_iip, &ppr->cr_iip);
+ retval |= __put_user(psr, &ppr->cr_ipsr);
+
+ /* app regs */
+
+ retval |= __put_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]);
+ retval |= __put_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]);
+ retval |= __put_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]);
+ retval |= __put_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]);
+ retval |= __put_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]);
+ retval |= __put_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]);
+
+ retval |= __put_user(ec, &ppr->ar[PT_AUR_EC]);
+ retval |= __put_user(lc, &ppr->ar[PT_AUR_LC]);
+ retval |= __put_user(rnat, &ppr->ar[PT_AUR_RNAT]);
+ retval |= __put_user(bsp, &ppr->ar[PT_AUR_BSP]);
+ retval |= __put_user(cfm, &ppr->cfm);
+
+ /* gr1-gr3 */
+
+ retval |= __copy_to_user(&ppr->gr[1], &pt->r1, sizeof(long));
+ retval |= __copy_to_user(&ppr->gr[2], &pt->r2, sizeof(long) *2);
+
+ /* gr4-gr7 */
+
+ for (i = 4; i < 8; i++) {
+ if (unw_access_gr(&info, i, &val, &nat, 0) < 0)
+ return -EIO;
+ retval |= __put_user(val, &ppr->gr[i]);
+ }
+
+ /* gr8-gr11 */
+
+ retval |= __copy_to_user(&ppr->gr[8], &pt->r8, sizeof(long) * 4);
+
+ /* gr12-gr15 */
+
+ retval |= __copy_to_user(&ppr->gr[12], &pt->r12, sizeof(long) * 2);
+ retval |= __copy_to_user(&ppr->gr[14], &pt->r14, sizeof(long));
+ retval |= __copy_to_user(&ppr->gr[15], &pt->r15, sizeof(long));
+
+ /* gr16-gr31 */
+
+ retval |= __copy_to_user(&ppr->gr[16], &pt->r16, sizeof(long) * 16);
+
+ /* b0 */
+
+ retval |= __put_user(pt->b0, &ppr->br[0]);
+
+ /* b1-b5 */
+
+ for (i = 1; i < 6; i++) {
+ if (unw_access_br(&info, i, &val, 0) < 0)
+ return -EIO;
+ __put_user(val, &ppr->br[i]);
+ }
+
+ /* b6-b7 */
+
+ retval |= __put_user(pt->b6, &ppr->br[6]);
+ retval |= __put_user(pt->b7, &ppr->br[7]);
+
+ /* fr2-fr5 */
+
+ for (i = 2; i < 6; i++) {
+ if (unw_get_fr(&info, i, &fpval) < 0)
+ return -EIO;
+ retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval));
+ }
+
+ /* fr6-fr11 */
+
+ retval |= __copy_to_user(&ppr->fr[6], &pt->f6,
+ sizeof(struct ia64_fpreg) * 6);
+
+ /* fp scratch regs(12-15) */
+
+ retval |= __copy_to_user(&ppr->fr[12], &sw->f12,
+ sizeof(struct ia64_fpreg) * 4);
+
+ /* fr16-fr31 */
+
+ for (i = 16; i < 32; i++) {
+ if (unw_get_fr(&info, i, &fpval) < 0)
+ return -EIO;
+ retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval));
+ }
+
+ /* fph */
+
+ ia64_flush_fph(child);
+ retval |= __copy_to_user(&ppr->fr[32], &child->thread.fph,
+ sizeof(ppr->fr[32]) * 96);
+
+ /* preds */
+
+ retval |= __put_user(pt->pr, &ppr->pr);
+
+ /* nat bits */
+
+ retval |= __put_user(nat_bits, &ppr->nat);
+
+ ret = retval ? -EIO : 0;
+ return ret;
+}
+
+static long
+ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
+{
+ unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val = 0;
+ struct unw_frame_info info;
+ struct switch_stack *sw;
+ struct ia64_fpreg fpval;
+ struct pt_regs *pt;
+ long ret, retval = 0;
+ int i;
+
+ memset(&fpval, 0, sizeof(fpval));
+
+ if (!access_ok(VERIFY_READ, ppr, sizeof(struct pt_all_user_regs)))
+ return -EIO;
+
+ pt = ia64_task_regs(child);
+ sw = (struct switch_stack *) (child->thread.ksp + 16);
+ unw_init_from_blocked_task(&info, child);
+ if (unw_unwind_to_user(&info) < 0) {
+ return -EIO;
+ }
+
+ if (((unsigned long) ppr & 0x7) != 0) {
+ dprintk("ptrace:unaligned register address %p\n", ppr);
+ return -EIO;
+ }
+
+ /* control regs */
+
+ retval |= __get_user(pt->cr_iip, &ppr->cr_iip);
+ retval |= __get_user(psr, &ppr->cr_ipsr);
+
+ /* app regs */
+
+ retval |= __get_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]);
+ retval |= __get_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]);
+ retval |= __get_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]);
+ retval |= __get_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]);
+ retval |= __get_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]);
+ retval |= __get_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]);
+
+ retval |= __get_user(ec, &ppr->ar[PT_AUR_EC]);
+ retval |= __get_user(lc, &ppr->ar[PT_AUR_LC]);
+ retval |= __get_user(rnat, &ppr->ar[PT_AUR_RNAT]);
+ retval |= __get_user(bsp, &ppr->ar[PT_AUR_BSP]);
+ retval |= __get_user(cfm, &ppr->cfm);
+
+ /* gr1-gr3 */
+
+ retval |= __copy_from_user(&pt->r1, &ppr->gr[1], sizeof(long));
+ retval |= __copy_from_user(&pt->r2, &ppr->gr[2], sizeof(long) * 2);
+
+ /* gr4-gr7 */
+
+ for (i = 4; i < 8; i++) {
+ retval |= __get_user(val, &ppr->gr[i]);
+ /* NaT bit will be set via PT_NAT_BITS: */
+ if (unw_set_gr(&info, i, val, 0) < 0)
+ return -EIO;
+ }
+
+ /* gr8-gr11 */
+
+ retval |= __copy_from_user(&pt->r8, &ppr->gr[8], sizeof(long) * 4);
+
+ /* gr12-gr15 */
+
+ retval |= __copy_from_user(&pt->r12, &ppr->gr[12], sizeof(long) * 2);
+ retval |= __copy_from_user(&pt->r14, &ppr->gr[14], sizeof(long));
+ retval |= __copy_from_user(&pt->r15, &ppr->gr[15], sizeof(long));
+
+ /* gr16-gr31 */
+
+ retval |= __copy_from_user(&pt->r16, &ppr->gr[16], sizeof(long) * 16);
+
+ /* b0 */
+
+ retval |= __get_user(pt->b0, &ppr->br[0]);
+
+ /* b1-b5 */
+
+ for (i = 1; i < 6; i++) {
+ retval |= __get_user(val, &ppr->br[i]);
+ unw_set_br(&info, i, val);
+ }
+
+ /* b6-b7 */
+
+ retval |= __get_user(pt->b6, &ppr->br[6]);
+ retval |= __get_user(pt->b7, &ppr->br[7]);
+
+ /* fr2-fr5 */
+
+ for (i = 2; i < 6; i++) {
+ retval |= __copy_from_user(&fpval, &ppr->fr[i], sizeof(fpval));
+ if (unw_set_fr(&info, i, fpval) < 0)
+ return -EIO;
+ }
+
+ /* fr6-fr11 */
+
+ retval |= __copy_from_user(&pt->f6, &ppr->fr[6],
+ sizeof(ppr->fr[6]) * 6);
+
+ /* fp scratch regs(12-15) */
+
+ retval |= __copy_from_user(&sw->f12, &ppr->fr[12],
+ sizeof(ppr->fr[12]) * 4);
+
+ /* fr16-fr31 */
+
+ for (i = 16; i < 32; i++) {
+ retval |= __copy_from_user(&fpval, &ppr->fr[i],
+ sizeof(fpval));
+ if (unw_set_fr(&info, i, fpval) < 0)
+ return -EIO;
+ }
+
+ /* fph */
+
+ ia64_sync_fph(child);
+ retval |= __copy_from_user(&child->thread.fph, &ppr->fr[32],
+ sizeof(ppr->fr[32]) * 96);
+
+ /* preds */
+
+ retval |= __get_user(pt->pr, &ppr->pr);
+
+ /* nat bits */
+
+ retval |= __get_user(nat_bits, &ppr->nat);
+
+ retval |= access_uarea(child, PT_CR_IPSR, &psr, 1);
+ retval |= access_uarea(child, PT_AR_EC, &ec, 1);
+ retval |= access_uarea(child, PT_AR_LC, &lc, 1);
+ retval |= access_uarea(child, PT_AR_RNAT, &rnat, 1);
+ retval |= access_uarea(child, PT_AR_BSP, &bsp, 1);
+ retval |= access_uarea(child, PT_CFM, &cfm, 1);
+ retval |= access_uarea(child, PT_NAT_BITS, &nat_bits, 1);
+
+ ret = retval ? -EIO : 0;
+ return ret;
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure the single step bit is not set.
+ */
+void
+ptrace_disable (struct task_struct *child)
+{
+ struct ia64_psr *child_psr = ia64_psr(ia64_task_regs(child));
+
+ /* make sure the single step/taken-branch trap bits are not set: */
+ child_psr->ss = 0;
+ child_psr->tb = 0;
+}
+
+asmlinkage long
+sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
+{
+ struct pt_regs *pt;
+ unsigned long urbs_end, peek_or_poke;
+ struct task_struct *child;
+ struct switch_stack *sw;
+ long ret;
+
+ lock_kernel();
+ ret = -EPERM;
+ if (request == PTRACE_TRACEME) {
+ /* are we already being traced? */
+ if (current->ptrace & PT_PTRACED)
+ goto out;
+ ret = security_ptrace(current->parent, current);
+ if (ret)
+ goto out;
+ current->ptrace |= PT_PTRACED;
+ ret = 0;
+ goto out;
+ }
+
+ peek_or_poke = (request == PTRACE_PEEKTEXT
+ || request == PTRACE_PEEKDATA
+ || request == PTRACE_POKETEXT
+ || request == PTRACE_POKEDATA);
+ ret = -ESRCH;
+ read_lock(&tasklist_lock);
+ {
+ child = find_task_by_pid(pid);
+ if (child) {
+ if (peek_or_poke)
+ child = find_thread_for_addr(child, addr);
+ get_task_struct(child);
+ }
+ }
+ read_unlock(&tasklist_lock);
+ if (!child)
+ goto out;
+ ret = -EPERM;
+ if (pid == 1) /* no messing around with init! */
+ goto out_tsk;
+
+ if (request == PTRACE_ATTACH) {
+ ret = ptrace_attach(child);
+ goto out_tsk;
+ }
+
+ ret = ptrace_check_attach(child, request == PTRACE_KILL);
+ if (ret < 0)
+ goto out_tsk;
+
+ pt = ia64_task_regs(child);
+ sw = (struct switch_stack *) (child->thread.ksp + 16);
+
+ switch (request) {
+ case PTRACE_PEEKTEXT:
+ case PTRACE_PEEKDATA:
+ /* read word at location addr */
+ urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
+ ret = ia64_peek(child, sw, urbs_end, addr, &data);
+ if (ret == 0) {
+ ret = data;
+ /* ensure "ret" is not mistaken as an error code: */
+ force_successful_syscall_return();
+ }
+ goto out_tsk;
+
+ case PTRACE_POKETEXT:
+ case PTRACE_POKEDATA:
+ /* write the word at location addr */
+ urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
+ ret = ia64_poke(child, sw, urbs_end, addr, data);
+ goto out_tsk;
+
+ case PTRACE_PEEKUSR:
+ /* read the word at addr in the USER area */
+ if (access_uarea(child, addr, &data, 0) < 0) {
+ ret = -EIO;
+ goto out_tsk;
+ }
+ ret = data;
+ /* ensure "ret" is not mistaken as an error code */
+ force_successful_syscall_return();
+ goto out_tsk;
+
+ case PTRACE_POKEUSR:
+ /* write the word at addr in the USER area */
+ if (access_uarea(child, addr, &data, 1) < 0) {
+ ret = -EIO;
+ goto out_tsk;
+ }
+ ret = 0;
+ goto out_tsk;
+
+ case PTRACE_OLD_GETSIGINFO:
+ /* for backwards-compatibility */
+ ret = ptrace_request(child, PTRACE_GETSIGINFO, addr, data);
+ goto out_tsk;
+
+ case PTRACE_OLD_SETSIGINFO:
+ /* for backwards-compatibility */
+ ret = ptrace_request(child, PTRACE_SETSIGINFO, addr, data);
+ goto out_tsk;
+
+ case PTRACE_SYSCALL:
+ /* continue and stop at next (return from) syscall */
+ case PTRACE_CONT:
+ /* restart after signal. */
+ ret = -EIO;
+ if (data > _NSIG)
+ goto out_tsk;
+ if (request == PTRACE_SYSCALL)
+ set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ else
+ clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ child->exit_code = data;
+
+ /*
+ * Make sure the single step/taken-branch trap bits
+ * are not set:
+ */
+ ia64_psr(pt)->ss = 0;
+ ia64_psr(pt)->tb = 0;
+
+ wake_up_process(child);
+ ret = 0;
+ goto out_tsk;
+
+ case PTRACE_KILL:
+ /*
+ * Make the child exit. Best I can do is send it a
+ * sigkill. Perhaps it should be put in the status
+ * that it wants to exit.
+ */
+ if (child->exit_state == EXIT_ZOMBIE)
+ /* already dead */
+ goto out_tsk;
+ child->exit_code = SIGKILL;
+
+ ptrace_disable(child);
+ wake_up_process(child);
+ ret = 0;
+ goto out_tsk;
+
+ case PTRACE_SINGLESTEP:
+ /* let child execute for one instruction */
+ case PTRACE_SINGLEBLOCK:
+ ret = -EIO;
+ if (data > _NSIG)
+ goto out_tsk;
+
+ clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ if (request == PTRACE_SINGLESTEP) {
+ ia64_psr(pt)->ss = 1;
+ } else {
+ ia64_psr(pt)->tb = 1;
+ }
+ child->exit_code = data;
+
+ /* give it a chance to run. */
+ wake_up_process(child);
+ ret = 0;
+ goto out_tsk;
+
+ case PTRACE_DETACH:
+ /* detach a process that was attached. */
+ ret = ptrace_detach(child, data);
+ goto out_tsk;
+
+ case PTRACE_GETREGS:
+ ret = ptrace_getregs(child,
+ (struct pt_all_user_regs __user *) data);
+ goto out_tsk;
+
+ case PTRACE_SETREGS:
+ ret = ptrace_setregs(child,
+ (struct pt_all_user_regs __user *) data);
+ goto out_tsk;
+
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ goto out_tsk;
+ }
+ out_tsk:
+ put_task_struct(child);
+ out:
+ unlock_kernel();
+ return ret;
+}
+
+
+void
+syscall_trace (void)
+{
+ if (!test_thread_flag(TIF_SYSCALL_TRACE))
+ return;
+ if (!(current->ptrace & PT_PTRACED))
+ return;
+ /*
+ * The 0x80 provides a way for the tracing parent to
+ * distinguish between a syscall stop and SIGTRAP delivery.
+ */
+ ptrace_notify(SIGTRAP
+ | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
+
+ /*
+ * This isn't the same as continuing with a signal, but it
+ * will do for normal use. strace only continues with a
+ * signal if the stopping signal is not SIGTRAP. -brl
+ */
+ if (current->exit_code) {
+ send_sig(current->exit_code, current, 1);
+ current->exit_code = 0;
+ }
+}
+
+/* "asmlinkage" so the input arguments are preserved... */
+
+asmlinkage void
+syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
+ long arg4, long arg5, long arg6, long arg7,
+ struct pt_regs regs)
+{
+ long syscall;
+
+ if (unlikely(current->audit_context)) {
+ if (IS_IA32_PROCESS(&regs))
+ syscall = regs.r1;
+ else
+ syscall = regs.r15;
+
+ audit_syscall_entry(current, syscall, arg0, arg1, arg2, arg3);
+ }
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE)
+ && (current->ptrace & PT_PTRACED))
+ syscall_trace();
+}
+
+/* "asmlinkage" so the input arguments are preserved... */
+
+asmlinkage void
+syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
+ long arg4, long arg5, long arg6, long arg7,
+ struct pt_regs regs)
+{
+ if (unlikely(current->audit_context))
+ audit_syscall_exit(current, regs.r8);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE)
+ && (current->ptrace & PT_PTRACED))
+ syscall_trace();
+}
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
new file mode 100644
index 00000000000..acc0f132f86
--- /dev/null
+++ b/arch/ia64/kernel/sal.c
@@ -0,0 +1,302 @@
+/*
+ * System Abstraction Layer (SAL) interface routines.
+ *
+ * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+
+#include <asm/page.h>
+#include <asm/sal.h>
+#include <asm/pal.h>
+
+ __cacheline_aligned DEFINE_SPINLOCK(sal_lock);
+unsigned long sal_platform_features;
+
+unsigned short sal_revision;
+unsigned short sal_version;
+
+#define SAL_MAJOR(x) ((x) >> 8)
+#define SAL_MINOR(x) ((x) & 0xff)
+
+static struct {
+ void *addr; /* function entry point */
+ void *gpval; /* gp value to use */
+} pdesc;
+
+static long
+default_handler (void)
+{
+ return -1;
+}
+
+ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler;
+ia64_sal_desc_ptc_t *ia64_ptc_domain_info;
+
+const char *
+ia64_sal_strerror (long status)
+{
+ const char *str;
+ switch (status) {
+ case 0: str = "Call completed without error"; break;
+ case 1: str = "Effect a warm boot of the system to complete "
+ "the update"; break;
+ case -1: str = "Not implemented"; break;
+ case -2: str = "Invalid argument"; break;
+ case -3: str = "Call completed with error"; break;
+ case -4: str = "Virtual address not registered"; break;
+ case -5: str = "No information available"; break;
+ case -6: str = "Insufficient space to add the entry"; break;
+ case -7: str = "Invalid entry_addr value"; break;
+ case -8: str = "Invalid interrupt vector"; break;
+ case -9: str = "Requested memory not available"; break;
+ case -10: str = "Unable to write to the NVM device"; break;
+ case -11: str = "Invalid partition type specified"; break;
+ case -12: str = "Invalid NVM_Object id specified"; break;
+ case -13: str = "NVM_Object already has the maximum number "
+ "of partitions"; break;
+ case -14: str = "Insufficient space in partition for the "
+ "requested write sub-function"; break;
+ case -15: str = "Insufficient data buffer space for the "
+ "requested read record sub-function"; break;
+ case -16: str = "Scratch buffer required for the write/delete "
+ "sub-function"; break;
+ case -17: str = "Insufficient space in the NVM_Object for the "
+ "requested create sub-function"; break;
+ case -18: str = "Invalid value specified in the partition_rec "
+ "argument"; break;
+ case -19: str = "Record oriented I/O not supported for this "
+ "partition"; break;
+ case -20: str = "Bad format of record to be written or "
+ "required keyword variable not "
+ "specified"; break;
+ default: str = "Unknown SAL status code"; break;
+ }
+ return str;
+}
+
+void __init
+ia64_sal_handler_init (void *entry_point, void *gpval)
+{
+ /* fill in the SAL procedure descriptor and point ia64_sal to it: */
+ pdesc.addr = entry_point;
+ pdesc.gpval = gpval;
+ ia64_sal = (ia64_sal_handler) &pdesc;
+}
+
+static void __init
+check_versions (struct ia64_sal_systab *systab)
+{
+ sal_revision = (systab->sal_rev_major << 8) | systab->sal_rev_minor;
+ sal_version = (systab->sal_b_rev_major << 8) | systab->sal_b_rev_minor;
+
+ /* Check for broken firmware */
+ if ((sal_revision == SAL_VERSION_CODE(49, 29))
+ && (sal_version == SAL_VERSION_CODE(49, 29)))
+ {
+ /*
+ * Old firmware for zx2000 prototypes have this weird version number,
+ * reset it to something sane.
+ */
+ sal_revision = SAL_VERSION_CODE(2, 8);
+ sal_version = SAL_VERSION_CODE(0, 0);
+ }
+}
+
+static void __init
+sal_desc_entry_point (void *p)
+{
+ struct ia64_sal_desc_entry_point *ep = p;
+ ia64_pal_handler_init(__va(ep->pal_proc));
+ ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp));
+}
+
+#ifdef CONFIG_SMP
+static void __init
+set_smp_redirect (int flag)
+{
+#ifndef CONFIG_HOTPLUG_CPU
+ if (no_int_routing)
+ smp_int_redirect &= ~flag;
+ else
+ smp_int_redirect |= flag;
+#else
+ /*
+ * For CPU Hotplug we dont want to do any chipset supported
+ * interrupt redirection. The reason is this would require that
+ * All interrupts be stopped and hard bind the irq to a cpu.
+ * Later when the interrupt is fired we need to set the redir hint
+ * on again in the vector. This is combersome for something that the
+ * user mode irq balancer will solve anyways.
+ */
+ no_int_routing=1;
+ smp_int_redirect &= ~flag;
+#endif
+}
+#else
+#define set_smp_redirect(flag) do { } while (0)
+#endif
+
+static void __init
+sal_desc_platform_feature (void *p)
+{
+ struct ia64_sal_desc_platform_feature *pf = p;
+ sal_platform_features = pf->feature_mask;
+
+ printk(KERN_INFO "SAL Platform features:");
+ if (!sal_platform_features) {
+ printk(" None\n");
+ return;
+ }
+
+ if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_BUS_LOCK)
+ printk(" BusLock");
+ if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT) {
+ printk(" IRQ_Redirection");
+ set_smp_redirect(SMP_IRQ_REDIRECTION);
+ }
+ if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT) {
+ printk(" IPI_Redirection");
+ set_smp_redirect(SMP_IPI_REDIRECTION);
+ }
+ if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)
+ printk(" ITC_Drift");
+ printk("\n");
+}
+
+#ifdef CONFIG_SMP
+static void __init
+sal_desc_ap_wakeup (void *p)
+{
+ struct ia64_sal_desc_ap_wakeup *ap = p;
+
+ switch (ap->mechanism) {
+ case IA64_SAL_AP_EXTERNAL_INT:
+ ap_wakeup_vector = ap->vector;
+ printk(KERN_INFO "SAL: AP wakeup using external interrupt "
+ "vector 0x%lx\n", ap_wakeup_vector);
+ break;
+ default:
+ printk(KERN_ERR "SAL: AP wakeup mechanism unsupported!\n");
+ break;
+ }
+}
+
+static void __init
+chk_nointroute_opt(void)
+{
+ char *cp;
+ extern char saved_command_line[];
+
+ for (cp = saved_command_line; *cp; ) {
+ if (memcmp(cp, "nointroute", 10) == 0) {
+ no_int_routing = 1;
+ printk ("no_int_routing on\n");
+ break;
+ } else {
+ while (*cp != ' ' && *cp)
+ ++cp;
+ while (*cp == ' ')
+ ++cp;
+ }
+ }
+}
+
+#else
+static void __init sal_desc_ap_wakeup(void *p) { }
+#endif
+
+void __init
+ia64_sal_init (struct ia64_sal_systab *systab)
+{
+ char *p;
+ int i;
+
+ if (!systab) {
+ printk(KERN_WARNING "Hmm, no SAL System Table.\n");
+ return;
+ }
+
+ if (strncmp(systab->signature, "SST_", 4) != 0)
+ printk(KERN_ERR "bad signature in system table!");
+
+ check_versions(systab);
+#ifdef CONFIG_SMP
+ chk_nointroute_opt();
+#endif
+
+ /* revisions are coded in BCD, so %x does the job for us */
+ printk(KERN_INFO "SAL %x.%x: %.32s %.32s%sversion %x.%x\n",
+ SAL_MAJOR(sal_revision), SAL_MINOR(sal_revision),
+ systab->oem_id, systab->product_id,
+ systab->product_id[0] ? " " : "",
+ SAL_MAJOR(sal_version), SAL_MINOR(sal_version));
+
+ p = (char *) (systab + 1);
+ for (i = 0; i < systab->entry_count; i++) {
+ /*
+ * The first byte of each entry type contains the type
+ * descriptor.
+ */
+ switch (*p) {
+ case SAL_DESC_ENTRY_POINT:
+ sal_desc_entry_point(p);
+ break;
+ case SAL_DESC_PLATFORM_FEATURE:
+ sal_desc_platform_feature(p);
+ break;
+ case SAL_DESC_PTC:
+ ia64_ptc_domain_info = (ia64_sal_desc_ptc_t *)p;
+ break;
+ case SAL_DESC_AP_WAKEUP:
+ sal_desc_ap_wakeup(p);
+ break;
+ }
+ p += SAL_DESC_SIZE(*p);
+ }
+}
+
+int
+ia64_sal_oemcall(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1,
+ u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7)
+{
+ if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
+ return -1;
+ SAL_CALL(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+ return 0;
+}
+EXPORT_SYMBOL(ia64_sal_oemcall);
+
+int
+ia64_sal_oemcall_nolock(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1,
+ u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6,
+ u64 arg7)
+{
+ if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
+ return -1;
+ SAL_CALL_NOLOCK(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6,
+ arg7);
+ return 0;
+}
+EXPORT_SYMBOL(ia64_sal_oemcall_nolock);
+
+int
+ia64_sal_oemcall_reentrant(struct ia64_sal_retval *isrvp, u64 oemfunc,
+ u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5,
+ u64 arg6, u64 arg7)
+{
+ if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
+ return -1;
+ SAL_CALL_REENTRANT(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6,
+ arg7);
+ return 0;
+}
+EXPORT_SYMBOL(ia64_sal_oemcall_reentrant);
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
new file mode 100644
index 00000000000..d227fabecd0
--- /dev/null
+++ b/arch/ia64/kernel/salinfo.c
@@ -0,0 +1,629 @@
+/*
+ * salinfo.c
+ *
+ * Creates entries in /proc/sal for various system features.
+ *
+ * Copyright (c) 2003 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) 2003 Hewlett-Packard Co
+ * Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * 10/30/2001 jbarnes@sgi.com copied much of Stephane's palinfo
+ * code to create this file
+ * Oct 23 2003 kaos@sgi.com
+ * Replace IPI with set_cpus_allowed() to read a record from the required cpu.
+ * Redesign salinfo log processing to separate interrupt and user space
+ * contexts.
+ * Cache the record across multi-block reads from user space.
+ * Support > 64 cpus.
+ * Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module.
+ *
+ * Jan 28 2004 kaos@sgi.com
+ * Periodically check for outstanding MCA or INIT records.
+ *
+ * Dec 5 2004 kaos@sgi.com
+ * Standardize which records are cleared automatically.
+ */
+
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/timer.h>
+#include <linux/vmalloc.h>
+
+#include <asm/semaphore.h>
+#include <asm/sal.h>
+#include <asm/uaccess.h>
+
+MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
+MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
+MODULE_LICENSE("GPL");
+
+static int salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data);
+
+typedef struct {
+ const char *name; /* name of the proc entry */
+ unsigned long feature; /* feature bit */
+ struct proc_dir_entry *entry; /* registered entry (removal) */
+} salinfo_entry_t;
+
+/*
+ * List {name,feature} pairs for every entry in /proc/sal/<feature>
+ * that this module exports
+ */
+static salinfo_entry_t salinfo_entries[]={
+ { "bus_lock", IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, },
+ { "irq_redirection", IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, },
+ { "ipi_redirection", IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, },
+ { "itc_drift", IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT, },
+};
+
+#define NR_SALINFO_ENTRIES ARRAY_SIZE(salinfo_entries)
+
+static char *salinfo_log_name[] = {
+ "mca",
+ "init",
+ "cmc",
+ "cpe",
+};
+
+static struct proc_dir_entry *salinfo_proc_entries[
+ ARRAY_SIZE(salinfo_entries) + /* /proc/sal/bus_lock */
+ ARRAY_SIZE(salinfo_log_name) + /* /proc/sal/{mca,...} */
+ (2 * ARRAY_SIZE(salinfo_log_name)) + /* /proc/sal/mca/{event,data} */
+ 1]; /* /proc/sal */
+
+/* Some records we get ourselves, some are accessed as saved data in buffers
+ * that are owned by mca.c.
+ */
+struct salinfo_data_saved {
+ u8* buffer;
+ u64 size;
+ u64 id;
+ int cpu;
+};
+
+/* State transitions. Actions are :-
+ * Write "read <cpunum>" to the data file.
+ * Write "clear <cpunum>" to the data file.
+ * Write "oemdata <cpunum> <offset> to the data file.
+ * Read from the data file.
+ * Close the data file.
+ *
+ * Start state is NO_DATA.
+ *
+ * NO_DATA
+ * write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ * write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ * write "oemdata <cpunum> <offset> -> return -EINVAL.
+ * read data -> return EOF.
+ * close -> unchanged. Free record areas.
+ *
+ * LOG_RECORD
+ * write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ * write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ * write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
+ * read data -> return the INIT/MCA/CMC/CPE record.
+ * close -> unchanged. Keep record areas.
+ *
+ * OEMDATA
+ * write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ * write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ * write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
+ * read data -> return the formatted oemdata.
+ * close -> unchanged. Keep record areas.
+ *
+ * Closing the data file does not change the state. This allows shell scripts
+ * to manipulate salinfo data, each shell redirection opens the file, does one
+ * action then closes it again. The record areas are only freed at close when
+ * the state is NO_DATA.
+ */
+enum salinfo_state {
+ STATE_NO_DATA,
+ STATE_LOG_RECORD,
+ STATE_OEMDATA,
+};
+
+struct salinfo_data {
+ volatile cpumask_t cpu_event; /* which cpus have outstanding events */
+ struct semaphore sem; /* count of cpus with outstanding events (bits set in cpu_event) */
+ u8 *log_buffer;
+ u64 log_size;
+ u8 *oemdata; /* decoded oem data */
+ u64 oemdata_size;
+ int open; /* single-open to prevent races */
+ u8 type;
+ u8 saved_num; /* using a saved record? */
+ enum salinfo_state state :8; /* processing state */
+ u8 padding;
+ int cpu_check; /* next CPU to check */
+ struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */
+};
+
+static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)];
+
+static spinlock_t data_lock, data_saved_lock;
+
+/** salinfo_platform_oemdata - optional callback to decode oemdata from an error
+ * record.
+ * @sect_header: pointer to the start of the section to decode.
+ * @oemdata: returns vmalloc area containing the decded output.
+ * @oemdata_size: returns length of decoded output (strlen).
+ *
+ * Description: If user space asks for oem data to be decoded by the kernel
+ * and/or prom and the platform has set salinfo_platform_oemdata to the address
+ * of a platform specific routine then call that routine. salinfo_platform_oemdata
+ * vmalloc's and formats its output area, returning the address of the text
+ * and its strlen. Returns 0 for success, -ve for error. The callback is
+ * invoked on the cpu that generated the error record.
+ */
+int (*salinfo_platform_oemdata)(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size);
+
+struct salinfo_platform_oemdata_parms {
+ const u8 *efi_guid;
+ u8 **oemdata;
+ u64 *oemdata_size;
+ int ret;
+};
+
+static void
+salinfo_platform_oemdata_cpu(void *context)
+{
+ struct salinfo_platform_oemdata_parms *parms = context;
+ parms->ret = salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size);
+}
+
+static void
+shift1_data_saved (struct salinfo_data *data, int shift)
+{
+ memcpy(data->data_saved+shift, data->data_saved+shift+1,
+ (ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0]));
+ memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0,
+ sizeof(data->data_saved[0]));
+}
+
+/* This routine is invoked in interrupt context. Note: mca.c enables
+ * interrupts before calling this code for CMC/CPE. MCA and INIT events are
+ * not irq safe, do not call any routines that use spinlocks, they may deadlock.
+ * MCA and INIT records are recorded, a timer event will look for any
+ * outstanding events and wake up the user space code.
+ *
+ * The buffer passed from mca.c points to the output from ia64_log_get. This is
+ * a persistent buffer but its contents can change between the interrupt and
+ * when user space processes the record. Save the record id to identify
+ * changes.
+ */
+void
+salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
+{
+ struct salinfo_data *data = salinfo_data + type;
+ struct salinfo_data_saved *data_saved;
+ unsigned long flags = 0;
+ int i;
+ int saved_size = ARRAY_SIZE(data->data_saved);
+
+ BUG_ON(type >= ARRAY_SIZE(salinfo_log_name));
+
+ if (irqsafe)
+ spin_lock_irqsave(&data_saved_lock, flags);
+ for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+ if (!data_saved->buffer)
+ break;
+ }
+ if (i == saved_size) {
+ if (!data->saved_num) {
+ shift1_data_saved(data, 0);
+ data_saved = data->data_saved + saved_size - 1;
+ } else
+ data_saved = NULL;
+ }
+ if (data_saved) {
+ data_saved->cpu = smp_processor_id();
+ data_saved->id = ((sal_log_record_header_t *)buffer)->id;
+ data_saved->size = size;
+ data_saved->buffer = buffer;
+ }
+ if (irqsafe)
+ spin_unlock_irqrestore(&data_saved_lock, flags);
+
+ if (!test_and_set_bit(smp_processor_id(), &data->cpu_event)) {
+ if (irqsafe)
+ up(&data->sem);
+ }
+}
+
+/* Check for outstanding MCA/INIT records every minute (arbitrary) */
+#define SALINFO_TIMER_DELAY (60*HZ)
+static struct timer_list salinfo_timer;
+
+static void
+salinfo_timeout_check(struct salinfo_data *data)
+{
+ int i;
+ if (!data->open)
+ return;
+ for (i = 0; i < NR_CPUS; ++i) {
+ if (test_bit(i, &data->cpu_event)) {
+ /* double up() is not a problem, user space will see no
+ * records for the additional "events".
+ */
+ up(&data->sem);
+ }
+ }
+}
+
+static void
+salinfo_timeout (unsigned long arg)
+{
+ salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA);
+ salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT);
+ salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
+ add_timer(&salinfo_timer);
+}
+
+static int
+salinfo_event_open(struct inode *inode, struct file *file)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ return 0;
+}
+
+static ssize_t
+salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct proc_dir_entry *entry = PDE(inode);
+ struct salinfo_data *data = entry->data;
+ char cmd[32];
+ size_t size;
+ int i, n, cpu = -1;
+
+retry:
+ if (down_trylock(&data->sem)) {
+ if (file->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+ if (down_interruptible(&data->sem))
+ return -ERESTARTSYS;
+ }
+
+ n = data->cpu_check;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (test_bit(n, &data->cpu_event)) {
+ cpu = n;
+ break;
+ }
+ if (++n == NR_CPUS)
+ n = 0;
+ }
+
+ if (cpu == -1)
+ goto retry;
+
+ /* events are sticky until the user says "clear" */
+ up(&data->sem);
+
+ /* for next read, start checking at next CPU */
+ data->cpu_check = cpu;
+ if (++data->cpu_check == NR_CPUS)
+ data->cpu_check = 0;
+
+ snprintf(cmd, sizeof(cmd), "read %d\n", cpu);
+
+ size = strlen(cmd);
+ if (size > count)
+ size = count;
+ if (copy_to_user(buffer, cmd, size))
+ return -EFAULT;
+
+ return size;
+}
+
+static struct file_operations salinfo_event_fops = {
+ .open = salinfo_event_open,
+ .read = salinfo_event_read,
+};
+
+static int
+salinfo_log_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *entry = PDE(inode);
+ struct salinfo_data *data = entry->data;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ spin_lock(&data_lock);
+ if (data->open) {
+ spin_unlock(&data_lock);
+ return -EBUSY;
+ }
+ data->open = 1;
+ spin_unlock(&data_lock);
+
+ if (data->state == STATE_NO_DATA &&
+ !(data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type)))) {
+ data->open = 0;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int
+salinfo_log_release(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *entry = PDE(inode);
+ struct salinfo_data *data = entry->data;
+
+ if (data->state == STATE_NO_DATA) {
+ vfree(data->log_buffer);
+ vfree(data->oemdata);
+ data->log_buffer = NULL;
+ data->oemdata = NULL;
+ }
+ spin_lock(&data_lock);
+ data->open = 0;
+ spin_unlock(&data_lock);
+ return 0;
+}
+
+static void
+call_on_cpu(int cpu, void (*fn)(void *), void *arg)
+{
+ cpumask_t save_cpus_allowed, new_cpus_allowed;
+ memcpy(&save_cpus_allowed, &current->cpus_allowed, sizeof(save_cpus_allowed));
+ memset(&new_cpus_allowed, 0, sizeof(new_cpus_allowed));
+ set_bit(cpu, &new_cpus_allowed);
+ set_cpus_allowed(current, new_cpus_allowed);
+ (*fn)(arg);
+ set_cpus_allowed(current, save_cpus_allowed);
+}
+
+static void
+salinfo_log_read_cpu(void *context)
+{
+ struct salinfo_data *data = context;
+ sal_log_record_header_t *rh;
+ data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer);
+ rh = (sal_log_record_header_t *)(data->log_buffer);
+ /* Clear corrected errors as they are read from SAL */
+ if (rh->severity == sal_log_severity_corrected)
+ ia64_sal_clear_state_info(data->type);
+}
+
+static void
+salinfo_log_new_read(int cpu, struct salinfo_data *data)
+{
+ struct salinfo_data_saved *data_saved;
+ unsigned long flags;
+ int i;
+ int saved_size = ARRAY_SIZE(data->data_saved);
+
+ data->saved_num = 0;
+ spin_lock_irqsave(&data_saved_lock, flags);
+retry:
+ for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+ if (data_saved->buffer && data_saved->cpu == cpu) {
+ sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer);
+ data->log_size = data_saved->size;
+ memcpy(data->log_buffer, rh, data->log_size);
+ barrier(); /* id check must not be moved */
+ if (rh->id == data_saved->id) {
+ data->saved_num = i+1;
+ break;
+ }
+ /* saved record changed by mca.c since interrupt, discard it */
+ shift1_data_saved(data, i);
+ goto retry;
+ }
+ }
+ spin_unlock_irqrestore(&data_saved_lock, flags);
+
+ if (!data->saved_num)
+ call_on_cpu(cpu, salinfo_log_read_cpu, data);
+ if (!data->log_size) {
+ data->state = STATE_NO_DATA;
+ clear_bit(cpu, &data->cpu_event);
+ } else {
+ data->state = STATE_LOG_RECORD;
+ }
+}
+
+static ssize_t
+salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct proc_dir_entry *entry = PDE(inode);
+ struct salinfo_data *data = entry->data;
+ u8 *buf;
+ u64 bufsize;
+
+ if (data->state == STATE_LOG_RECORD) {
+ buf = data->log_buffer;
+ bufsize = data->log_size;
+ } else if (data->state == STATE_OEMDATA) {
+ buf = data->oemdata;
+ bufsize = data->oemdata_size;
+ } else {
+ buf = NULL;
+ bufsize = 0;
+ }
+ return simple_read_from_buffer(buffer, count, ppos, buf, bufsize);
+}
+
+static void
+salinfo_log_clear_cpu(void *context)
+{
+ struct salinfo_data *data = context;
+ ia64_sal_clear_state_info(data->type);
+}
+
+static int
+salinfo_log_clear(struct salinfo_data *data, int cpu)
+{
+ sal_log_record_header_t *rh;
+ data->state = STATE_NO_DATA;
+ if (!test_bit(cpu, &data->cpu_event))
+ return 0;
+ down(&data->sem);
+ clear_bit(cpu, &data->cpu_event);
+ if (data->saved_num) {
+ unsigned long flags;
+ spin_lock_irqsave(&data_saved_lock, flags);
+ shift1_data_saved(data, data->saved_num - 1 );
+ data->saved_num = 0;
+ spin_unlock_irqrestore(&data_saved_lock, flags);
+ }
+ rh = (sal_log_record_header_t *)(data->log_buffer);
+ /* Corrected errors have already been cleared from SAL */
+ if (rh->severity != sal_log_severity_corrected)
+ call_on_cpu(cpu, salinfo_log_clear_cpu, data);
+ /* clearing a record may make a new record visible */
+ salinfo_log_new_read(cpu, data);
+ if (data->state == STATE_LOG_RECORD &&
+ !test_and_set_bit(cpu, &data->cpu_event))
+ up(&data->sem);
+ return 0;
+}
+
+static ssize_t
+salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct proc_dir_entry *entry = PDE(inode);
+ struct salinfo_data *data = entry->data;
+ char cmd[32];
+ size_t size;
+ u32 offset;
+ int cpu;
+
+ size = sizeof(cmd);
+ if (count < size)
+ size = count;
+ if (copy_from_user(cmd, buffer, size))
+ return -EFAULT;
+
+ if (sscanf(cmd, "read %d", &cpu) == 1) {
+ salinfo_log_new_read(cpu, data);
+ } else if (sscanf(cmd, "clear %d", &cpu) == 1) {
+ int ret;
+ if ((ret = salinfo_log_clear(data, cpu)))
+ count = ret;
+ } else if (sscanf(cmd, "oemdata %d %d", &cpu, &offset) == 2) {
+ if (data->state != STATE_LOG_RECORD && data->state != STATE_OEMDATA)
+ return -EINVAL;
+ if (offset > data->log_size - sizeof(efi_guid_t))
+ return -EINVAL;
+ data->state = STATE_OEMDATA;
+ if (salinfo_platform_oemdata) {
+ struct salinfo_platform_oemdata_parms parms = {
+ .efi_guid = data->log_buffer + offset,
+ .oemdata = &data->oemdata,
+ .oemdata_size = &data->oemdata_size
+ };
+ call_on_cpu(cpu, salinfo_platform_oemdata_cpu, &parms);
+ if (parms.ret)
+ count = parms.ret;
+ } else
+ data->oemdata_size = 0;
+ } else
+ return -EINVAL;
+
+ return count;
+}
+
+static struct file_operations salinfo_data_fops = {
+ .open = salinfo_log_open,
+ .release = salinfo_log_release,
+ .read = salinfo_log_read,
+ .write = salinfo_log_write,
+};
+
+static int __init
+salinfo_init(void)
+{
+ struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */
+ struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
+ struct proc_dir_entry *dir, *entry;
+ struct salinfo_data *data;
+ int i, j, online;
+
+ salinfo_dir = proc_mkdir("sal", NULL);
+ if (!salinfo_dir)
+ return 0;
+
+ for (i=0; i < NR_SALINFO_ENTRIES; i++) {
+ /* pass the feature bit in question as misc data */
+ *sdir++ = create_proc_read_entry (salinfo_entries[i].name, 0, salinfo_dir,
+ salinfo_read, (void *)salinfo_entries[i].feature);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
+ data = salinfo_data + i;
+ data->type = i;
+ sema_init(&data->sem, 0);
+ dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
+ if (!dir)
+ continue;
+
+ entry = create_proc_entry("event", S_IRUSR, dir);
+ if (!entry)
+ continue;
+ entry->data = data;
+ entry->proc_fops = &salinfo_event_fops;
+ *sdir++ = entry;
+
+ entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir);
+ if (!entry)
+ continue;
+ entry->data = data;
+ entry->proc_fops = &salinfo_data_fops;
+ *sdir++ = entry;
+
+ /* we missed any events before now */
+ online = 0;
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j)) {
+ set_bit(j, &data->cpu_event);
+ ++online;
+ }
+ sema_init(&data->sem, online);
+
+ *sdir++ = dir;
+ }
+
+ *sdir++ = salinfo_dir;
+
+ init_timer(&salinfo_timer);
+ salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
+ salinfo_timer.function = &salinfo_timeout;
+ add_timer(&salinfo_timer);
+
+ return 0;
+}
+
+/*
+ * 'data' contains an integer that corresponds to the feature we're
+ * testing
+ */
+static int
+salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+ int len = 0;
+
+ len = sprintf(page, (sal_platform_features & (unsigned long)data) ? "1\n" : "0\n");
+
+ if (len <= off+count) *eof = 1;
+
+ *start = page + off;
+ len -= off;
+
+ if (len>count) len = count;
+ if (len<0) len = 0;
+
+ return len;
+}
+
+module_init(salinfo_init);
diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c
new file mode 100644
index 00000000000..2724ef3fbae
--- /dev/null
+++ b/arch/ia64/kernel/semaphore.c
@@ -0,0 +1,165 @@
+/*
+ * IA-64 semaphore implementation (derived from x86 version).
+ *
+ * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * Semaphores are implemented using a two-way counter: The "count"
+ * variable is decremented for each process that tries to acquire the
+ * semaphore, while the "sleepers" variable is a count of such
+ * acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can efficiently
+ * test if they need to do any extra work (up needs to do something
+ * only if count was negative before the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is protected
+ * by the spinlock in the semaphore's waitqueue head.
+ *
+ * Note that these functions are only called when there is contention
+ * on the lock, and as such all this is the "non-critical" part of the
+ * whole semaphore business. The critical part is the inline stuff in
+ * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
+ */
+#include <linux/sched.h>
+#include <linux/init.h>
+
+#include <asm/errno.h>
+#include <asm/semaphore.h>
+
+/*
+ * Logic:
+ * - Only on a boundary condition do we need to care. When we go
+ * from a negative count to a non-negative, we wake people up.
+ * - When we go from a non-negative count to a negative do we
+ * (a) synchronize with the "sleepers" count and (b) make sure
+ * that we're on the wakeup list before we synchronize so that
+ * we cannot lose wakeup events.
+ */
+
+void
+__up (struct semaphore *sem)
+{
+ wake_up(&sem->wait);
+}
+
+void __sched __down (struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * the wait_queue_head.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+ tsk->state = TASK_RUNNING;
+}
+
+int __sched __down_interruptible (struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ unsigned long flags;
+
+ tsk->state = TASK_INTERRUPTIBLE;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ sem->sleepers ++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * With signals pending, this turns into
+ * the trylock failure case - we won't be
+ * sleeping, and we* can't get the lock as
+ * it has contention. Just correct the count
+ * and exit.
+ */
+ if (signal_pending(current)) {
+ retval = -EINTR;
+ sem->sleepers = 0;
+ atomic_add(sleepers, &sem->count);
+ break;
+ }
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock in
+ * wait_queue_head. The "-1" is because we're
+ * still hoping to get the semaphore.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ schedule();
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ tsk->state = TASK_INTERRUPTIBLE;
+ }
+ remove_wait_queue_locked(&sem->wait, &wait);
+ wake_up_locked(&sem->wait);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ tsk->state = TASK_RUNNING;
+ return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for having decremented the
+ * count.
+ */
+int
+__down_trylock (struct semaphore *sem)
+{
+ unsigned long flags;
+ int sleepers;
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+ sleepers = sem->sleepers + 1;
+ sem->sleepers = 0;
+
+ /*
+ * Add "everybody else" and us into it. They aren't
+ * playing, because we own the spinlock in the
+ * wait_queue_head.
+ */
+ if (!atomic_add_negative(sleepers, &sem->count)) {
+ wake_up_locked(&sem->wait);
+ }
+
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+ return 1;
+}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
new file mode 100644
index 00000000000..f05650c801d
--- /dev/null
+++ b/arch/ia64/kernel/setup.c
@@ -0,0 +1,723 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ *
+ * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo().
+ * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map
+ * 03/31/00 R.Seth cpu_initialized and current->processor fixes
+ * 02/04/00 D.Mosberger some more get_cpuinfo fixes...
+ * 02/01/00 R.Seth fixed get_cpuinfo for SMP
+ * 01/07/99 S.Eranian added the support for command line argument
+ * 06/24/99 W.Drummond added boot_cpu_data.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/threads.h>
+#include <linux/tty.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/efi.h>
+#include <linux/initrd.h>
+
+#include <asm/ia32.h>
+#include <asm/machvec.h>
+#include <asm/mca.h>
+#include <asm/meminit.h>
+#include <asm/page.h>
+#include <asm/patch.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/serial.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
+# error "struct cpuinfo_ia64 too big!"
+#endif
+
+#ifdef CONFIG_SMP
+unsigned long __per_cpu_offset[NR_CPUS];
+EXPORT_SYMBOL(__per_cpu_offset);
+#endif
+
+DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
+DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
+DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
+unsigned long ia64_cycles_per_usec;
+struct ia64_boot_param *ia64_boot_param;
+struct screen_info screen_info;
+
+unsigned long ia64_max_cacheline_size;
+unsigned long ia64_iobase; /* virtual address for I/O accesses */
+EXPORT_SYMBOL(ia64_iobase);
+struct io_space io_space[MAX_IO_SPACES];
+EXPORT_SYMBOL(io_space);
+unsigned int num_io_spaces;
+
+/*
+ * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This
+ * mask specifies a mask of address bits that must be 0 in order for two buffers to be
+ * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
+ * address of the second buffer must be aligned to (merge_mask+1) in order to be
+ * mergeable). By default, we assume there is no I/O MMU which can merge physically
+ * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu
+ * page-size of 2^64.
+ */
+unsigned long ia64_max_iommu_merge_mask = ~0UL;
+EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
+
+/*
+ * We use a special marker for the end of memory and it uses the extra (+1) slot
+ */
+struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
+int num_rsvd_regions;
+
+
+/*
+ * Filter incoming memory segments based on the primitive map created from the boot
+ * parameters. Segments contained in the map are removed from the memory ranges. A
+ * caller-specified function is called with the memory ranges that remain after filtering.
+ * This routine does not assume the incoming segments are sorted.
+ */
+int
+filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
+{
+ unsigned long range_start, range_end, prev_start;
+ void (*func)(unsigned long, unsigned long, int);
+ int i;
+
+#if IGNORE_PFN0
+ if (start == PAGE_OFFSET) {
+ printk(KERN_WARNING "warning: skipping physical page 0\n");
+ start += PAGE_SIZE;
+ if (start >= end) return 0;
+ }
+#endif
+ /*
+ * lowest possible address(walker uses virtual)
+ */
+ prev_start = PAGE_OFFSET;
+ func = arg;
+
+ for (i = 0; i < num_rsvd_regions; ++i) {
+ range_start = max(start, prev_start);
+ range_end = min(end, rsvd_region[i].start);
+
+ if (range_start < range_end)
+ call_pernode_memory(__pa(range_start), range_end - range_start, func);
+
+ /* nothing more available in this segment */
+ if (range_end == end) return 0;
+
+ prev_start = rsvd_region[i].end;
+ }
+ /* end of memory marker allows full processing inside loop body */
+ return 0;
+}
+
+static void
+sort_regions (struct rsvd_region *rsvd_region, int max)
+{
+ int j;
+
+ /* simple bubble sorting */
+ while (max--) {
+ for (j = 0; j < max; ++j) {
+ if (rsvd_region[j].start > rsvd_region[j+1].start) {
+ struct rsvd_region tmp;
+ tmp = rsvd_region[j];
+ rsvd_region[j] = rsvd_region[j + 1];
+ rsvd_region[j + 1] = tmp;
+ }
+ }
+ }
+}
+
+/**
+ * reserve_memory - setup reserved memory areas
+ *
+ * Setup the reserved memory areas set aside for the boot parameters,
+ * initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined,
+ * see include/asm-ia64/meminit.h if you need to define more.
+ */
+void
+reserve_memory (void)
+{
+ int n = 0;
+
+ /*
+ * none of the entries in this table overlap
+ */
+ rsvd_region[n].start = (unsigned long) ia64_boot_param;
+ rsvd_region[n].end = rsvd_region[n].start + sizeof(*ia64_boot_param);
+ n++;
+
+ rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap);
+ rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->efi_memmap_size;
+ n++;
+
+ rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line);
+ rsvd_region[n].end = (rsvd_region[n].start
+ + strlen(__va(ia64_boot_param->command_line)) + 1);
+ n++;
+
+ rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
+ rsvd_region[n].end = (unsigned long) ia64_imva(_end);
+ n++;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (ia64_boot_param->initrd_start) {
+ rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start);
+ rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->initrd_size;
+ n++;
+ }
+#endif
+
+ /* end of memory marker */
+ rsvd_region[n].start = ~0UL;
+ rsvd_region[n].end = ~0UL;
+ n++;
+
+ num_rsvd_regions = n;
+
+ sort_regions(rsvd_region, num_rsvd_regions);
+}
+
+/**
+ * find_initrd - get initrd parameters from the boot parameter structure
+ *
+ * Grab the initrd start and end from the boot parameter struct given us by
+ * the boot loader.
+ */
+void
+find_initrd (void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (ia64_boot_param->initrd_start) {
+ initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start);
+ initrd_end = initrd_start+ia64_boot_param->initrd_size;
+
+ printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n",
+ initrd_start, ia64_boot_param->initrd_size);
+ }
+#endif
+}
+
+static void __init
+io_port_init (void)
+{
+ extern unsigned long ia64_iobase;
+ unsigned long phys_iobase;
+
+ /*
+ * Set `iobase' to the appropriate address in region 6 (uncached access range).
+ *
+ * The EFI memory map is the "preferred" location to get the I/O port space base,
+ * rather the relying on AR.KR0. This should become more clear in future SAL
+ * specs. We'll fall back to getting it out of AR.KR0 if no appropriate entry is
+ * found in the memory map.
+ */
+ phys_iobase = efi_get_iobase();
+ if (phys_iobase)
+ /* set AR.KR0 since this is all we use it for anyway */
+ ia64_set_kr(IA64_KR_IO_BASE, phys_iobase);
+ else {
+ phys_iobase = ia64_get_kr(IA64_KR_IO_BASE);
+ printk(KERN_INFO "No I/O port range found in EFI memory map, falling back "
+ "to AR.KR0\n");
+ printk(KERN_INFO "I/O port base = 0x%lx\n", phys_iobase);
+ }
+ ia64_iobase = (unsigned long) ioremap(phys_iobase, 0);
+
+ /* setup legacy IO port space */
+ io_space[0].mmio_base = ia64_iobase;
+ io_space[0].sparse = 1;
+ num_io_spaces = 1;
+}
+
+/**
+ * early_console_setup - setup debugging console
+ *
+ * Consoles started here require little enough setup that we can start using
+ * them very early in the boot process, either right after the machine
+ * vector initialization, or even before if the drivers can detect their hw.
+ *
+ * Returns non-zero if a console couldn't be setup.
+ */
+static inline int __init
+early_console_setup (char *cmdline)
+{
+#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE
+ {
+ extern int sn_serial_console_early_setup(void);
+ if (!sn_serial_console_early_setup())
+ return 0;
+ }
+#endif
+#ifdef CONFIG_EFI_PCDP
+ if (!efi_setup_pcdp_console(cmdline))
+ return 0;
+#endif
+#ifdef CONFIG_SERIAL_8250_CONSOLE
+ if (!early_serial_console_init(cmdline))
+ return 0;
+#endif
+
+ return -1;
+}
+
+static inline void
+mark_bsp_online (void)
+{
+#ifdef CONFIG_SMP
+ /* If we register an early console, allow CPU 0 to printk */
+ cpu_set(smp_processor_id(), cpu_online_map);
+#endif
+}
+
+void __init
+setup_arch (char **cmdline_p)
+{
+ unw_init();
+
+ ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
+
+ *cmdline_p = __va(ia64_boot_param->command_line);
+ strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+
+ efi_init();
+ io_port_init();
+
+#ifdef CONFIG_IA64_GENERIC
+ {
+ const char *mvec_name = strstr (*cmdline_p, "machvec=");
+ char str[64];
+
+ if (mvec_name) {
+ const char *end;
+ size_t len;
+
+ mvec_name += 8;
+ end = strchr (mvec_name, ' ');
+ if (end)
+ len = end - mvec_name;
+ else
+ len = strlen (mvec_name);
+ len = min(len, sizeof (str) - 1);
+ strncpy (str, mvec_name, len);
+ str[len] = '\0';
+ mvec_name = str;
+ } else
+ mvec_name = acpi_get_sysname();
+ machvec_init(mvec_name);
+ }
+#endif
+
+ if (early_console_setup(*cmdline_p) == 0)
+ mark_bsp_online();
+
+#ifdef CONFIG_ACPI_BOOT
+ /* Initialize the ACPI boot-time table parser */
+ acpi_table_init();
+# ifdef CONFIG_ACPI_NUMA
+ acpi_numa_init();
+# endif
+#else
+# ifdef CONFIG_SMP
+ smp_build_cpu_map(); /* happens, e.g., with the Ski simulator */
+# endif
+#endif /* CONFIG_APCI_BOOT */
+
+ find_memory();
+
+ /* process SAL system table: */
+ ia64_sal_init(efi.sal_systab);
+
+#ifdef CONFIG_SMP
+ cpu_physical_id(0) = hard_smp_processor_id();
+#endif
+
+ cpu_init(); /* initialize the bootstrap CPU */
+
+#ifdef CONFIG_ACPI_BOOT
+ acpi_boot_init();
+#endif
+
+#ifdef CONFIG_VT
+ if (!conswitchp) {
+# if defined(CONFIG_DUMMY_CONSOLE)
+ conswitchp = &dummy_con;
+# endif
+# if defined(CONFIG_VGA_CONSOLE)
+ /*
+ * Non-legacy systems may route legacy VGA MMIO range to system
+ * memory. vga_con probes the MMIO hole, so memory looks like
+ * a VGA device to it. The EFI memory map can tell us if it's
+ * memory so we can avoid this problem.
+ */
+ if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY)
+ conswitchp = &vga_con;
+# endif
+ }
+#endif
+
+ /* enable IA-64 Machine Check Abort Handling unless disabled */
+ if (!strstr(saved_command_line, "nomca"))
+ ia64_mca_init();
+
+ platform_setup(cmdline_p);
+ paging_init();
+}
+
+/*
+ * Display cpu info for all cpu's.
+ */
+static int
+show_cpuinfo (struct seq_file *m, void *v)
+{
+#ifdef CONFIG_SMP
+# define lpj c->loops_per_jiffy
+# define cpunum c->cpu
+#else
+# define lpj loops_per_jiffy
+# define cpunum 0
+#endif
+ static struct {
+ unsigned long mask;
+ const char *feature_name;
+ } feature_bits[] = {
+ { 1UL << 0, "branchlong" },
+ { 1UL << 1, "spontaneous deferral"},
+ { 1UL << 2, "16-byte atomic ops" }
+ };
+ char family[32], features[128], *cp, sep;
+ struct cpuinfo_ia64 *c = v;
+ unsigned long mask;
+ int i;
+
+ mask = c->features;
+
+ switch (c->family) {
+ case 0x07: memcpy(family, "Itanium", 8); break;
+ case 0x1f: memcpy(family, "Itanium 2", 10); break;
+ default: sprintf(family, "%u", c->family); break;
+ }
+
+ /* build the feature string: */
+ memcpy(features, " standard", 10);
+ cp = features;
+ sep = 0;
+ for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) {
+ if (mask & feature_bits[i].mask) {
+ if (sep)
+ *cp++ = sep;
+ sep = ',';
+ *cp++ = ' ';
+ strcpy(cp, feature_bits[i].feature_name);
+ cp += strlen(feature_bits[i].feature_name);
+ mask &= ~feature_bits[i].mask;
+ }
+ }
+ if (mask) {
+ /* print unknown features as a hex value: */
+ if (sep)
+ *cp++ = sep;
+ sprintf(cp, " 0x%lx", mask);
+ }
+
+ seq_printf(m,
+ "processor : %d\n"
+ "vendor : %s\n"
+ "arch : IA-64\n"
+ "family : %s\n"
+ "model : %u\n"
+ "revision : %u\n"
+ "archrev : %u\n"
+ "features :%s\n" /* don't change this---it _is_ right! */
+ "cpu number : %lu\n"
+ "cpu regs : %u\n"
+ "cpu MHz : %lu.%06lu\n"
+ "itc MHz : %lu.%06lu\n"
+ "BogoMIPS : %lu.%02lu\n\n",
+ cpunum, c->vendor, family, c->model, c->revision, c->archrev,
+ features, c->ppn, c->number,
+ c->proc_freq / 1000000, c->proc_freq % 1000000,
+ c->itc_freq / 1000000, c->itc_freq % 1000000,
+ lpj*HZ/500000, (lpj*HZ/5000) % 100);
+ return 0;
+}
+
+static void *
+c_start (struct seq_file *m, loff_t *pos)
+{
+#ifdef CONFIG_SMP
+ while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
+ ++*pos;
+#endif
+ return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
+}
+
+static void *
+c_next (struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return c_start(m, pos);
+}
+
+static void
+c_stop (struct seq_file *m, void *v)
+{
+}
+
+struct seq_operations cpuinfo_op = {
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo
+};
+
+void
+identify_cpu (struct cpuinfo_ia64 *c)
+{
+ union {
+ unsigned long bits[5];
+ struct {
+ /* id 0 & 1: */
+ char vendor[16];
+
+ /* id 2 */
+ u64 ppn; /* processor serial number */
+
+ /* id 3: */
+ unsigned number : 8;
+ unsigned revision : 8;
+ unsigned model : 8;
+ unsigned family : 8;
+ unsigned archrev : 8;
+ unsigned reserved : 24;
+
+ /* id 4: */
+ u64 features;
+ } field;
+ } cpuid;
+ pal_vm_info_1_u_t vm1;
+ pal_vm_info_2_u_t vm2;
+ pal_status_t status;
+ unsigned long impl_va_msb = 50, phys_addr_size = 44; /* Itanium defaults */
+ int i;
+
+ for (i = 0; i < 5; ++i)
+ cpuid.bits[i] = ia64_get_cpuid(i);
+
+ memcpy(c->vendor, cpuid.field.vendor, 16);
+#ifdef CONFIG_SMP
+ c->cpu = smp_processor_id();
+#endif
+ c->ppn = cpuid.field.ppn;
+ c->number = cpuid.field.number;
+ c->revision = cpuid.field.revision;
+ c->model = cpuid.field.model;
+ c->family = cpuid.field.family;
+ c->archrev = cpuid.field.archrev;
+ c->features = cpuid.field.features;
+
+ status = ia64_pal_vm_summary(&vm1, &vm2);
+ if (status == PAL_STATUS_SUCCESS) {
+ impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb;
+ phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size;
+ }
+ c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1));
+ c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
+}
+
+void
+setup_per_cpu_areas (void)
+{
+ /* start_kernel() requires this... */
+}
+
+static void
+get_max_cacheline_size (void)
+{
+ unsigned long line_size, max = 1;
+ u64 l, levels, unique_caches;
+ pal_cache_config_info_t cci;
+ s64 status;
+
+ status = ia64_pal_cache_summary(&levels, &unique_caches);
+ if (status != 0) {
+ printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
+ __FUNCTION__, status);
+ max = SMP_CACHE_BYTES;
+ goto out;
+ }
+
+ for (l = 0; l < levels; ++l) {
+ status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,
+ &cci);
+ if (status != 0) {
+ printk(KERN_ERR
+ "%s: ia64_pal_cache_config_info(l=%lu) failed (status=%ld)\n",
+ __FUNCTION__, l, status);
+ max = SMP_CACHE_BYTES;
+ }
+ line_size = 1 << cci.pcci_line_size;
+ if (line_size > max)
+ max = line_size;
+ }
+ out:
+ if (max > ia64_max_cacheline_size)
+ ia64_max_cacheline_size = max;
+}
+
+/*
+ * cpu_init() initializes state that is per-CPU. This function acts
+ * as a 'CPU state barrier', nothing should get across.
+ */
+void
+cpu_init (void)
+{
+ extern void __devinit ia64_mmu_init (void *);
+ unsigned long num_phys_stacked;
+ pal_vm_info_2_u_t vmi;
+ unsigned int max_ctx;
+ struct cpuinfo_ia64 *cpu_info;
+ void *cpu_data;
+
+ cpu_data = per_cpu_init();
+
+ /*
+ * We set ar.k3 so that assembly code in MCA handler can compute
+ * physical addresses of per cpu variables with a simple:
+ * phys = ar.k3 + &per_cpu_var
+ */
+ ia64_set_kr(IA64_KR_PER_CPU_DATA,
+ ia64_tpa(cpu_data) - (long) __per_cpu_start);
+
+ get_max_cacheline_size();
+
+ /*
+ * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
+ * ia64_mmu_init() yet. And we can't call ia64_mmu_init() first because it
+ * depends on the data returned by identify_cpu(). We break the dependency by
+ * accessing cpu_data() through the canonical per-CPU address.
+ */
+ cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
+ identify_cpu(cpu_info);
+
+#ifdef CONFIG_MCKINLEY
+ {
+# define FEATURE_SET 16
+ struct ia64_pal_retval iprv;
+
+ if (cpu_info->family == 0x1f) {
+ PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0);
+ if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80))
+ PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES,
+ (iprv.v1 | 0x80), FEATURE_SET, 0);
+ }
+ }
+#endif
+
+ /* Clear the stack memory reserved for pt_regs: */
+ memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
+
+ ia64_set_kr(IA64_KR_FPU_OWNER, 0);
+
+ /*
+ * Initialize the page-table base register to a global
+ * directory with all zeroes. This ensure that we can handle
+ * TLB-misses to user address-space even before we created the
+ * first user address-space. This may happen, e.g., due to
+ * aggressive use of lfetch.fault.
+ */
+ ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page)));
+
+ /*
+ * Initialize default control register to defer all speculative faults. The
+ * kernel MUST NOT depend on a particular setting of these bits (in other words,
+ * the kernel must have recovery code for all speculative accesses). Turn on
+ * dcr.lc as per recommendation by the architecture team. Most IA-32 apps
+ * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll
+ * be fine).
+ */
+ ia64_setreg(_IA64_REG_CR_DCR, ( IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
+ | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
+ atomic_inc(&init_mm.mm_count);
+ current->active_mm = &init_mm;
+ if (current->mm)
+ BUG();
+
+ ia64_mmu_init(ia64_imva(cpu_data));
+ ia64_mca_cpu_init(ia64_imva(cpu_data));
+
+#ifdef CONFIG_IA32_SUPPORT
+ ia32_cpu_init();
+#endif
+
+ /* Clear ITC to eliminiate sched_clock() overflows in human time. */
+ ia64_set_itc(0);
+
+ /* disable all local interrupt sources: */
+ ia64_set_itv(1 << 16);
+ ia64_set_lrr0(1 << 16);
+ ia64_set_lrr1(1 << 16);
+ ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
+ ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
+
+ /* clear TPR & XTP to enable all interrupt classes: */
+ ia64_setreg(_IA64_REG_CR_TPR, 0);
+#ifdef CONFIG_SMP
+ normal_xtp();
+#endif
+
+ /* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
+ if (ia64_pal_vm_summary(NULL, &vmi) == 0)
+ max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
+ else {
+ printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
+ max_ctx = (1U << 15) - 1; /* use architected minimum */
+ }
+ while (max_ctx < ia64_ctx.max_ctx) {
+ unsigned int old = ia64_ctx.max_ctx;
+ if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old)
+ break;
+ }
+
+ if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) {
+ printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical "
+ "stacked regs\n");
+ num_phys_stacked = 96;
+ }
+ /* size of physical stacked register partition plus 8 bytes: */
+ __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
+ platform_cpu_init();
+}
+
+void
+check_bugs (void)
+{
+ ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
+ (unsigned long) __end___mckinley_e9_bundles);
+}
diff --git a/arch/ia64/kernel/sigframe.h b/arch/ia64/kernel/sigframe.h
new file mode 100644
index 00000000000..37b986cb86e
--- /dev/null
+++ b/arch/ia64/kernel/sigframe.h
@@ -0,0 +1,25 @@
+struct sigscratch {
+ unsigned long scratch_unat; /* ar.unat for the general registers saved in pt */
+ unsigned long ar_pfs; /* for syscalls, the user-level function-state */
+ struct pt_regs pt;
+};
+
+struct sigframe {
+ /*
+ * Place signal handler args where user-level unwinder can find them easily.
+ * DO NOT MOVE THESE. They are part of the IA-64 Linux ABI and there is
+ * user-level code that depends on their presence!
+ */
+ unsigned long arg0; /* signum */
+ unsigned long arg1; /* siginfo pointer */
+ unsigned long arg2; /* sigcontext pointer */
+ /*
+ * End of architected state.
+ */
+
+ void __user *handler; /* pointer to the plabel of the signal handler */
+ struct siginfo info;
+ struct sigcontext sc;
+};
+
+extern long ia64_do_signal (sigset_t *, struct sigscratch *, long);
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
new file mode 100644
index 00000000000..6891d86937d
--- /dev/null
+++ b/arch/ia64/kernel/signal.c
@@ -0,0 +1,691 @@
+/*
+ * Architecture-specific signal handling support.
+ *
+ * Copyright (C) 1999-2004 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Derived from i386 and Alpha versions.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/binfmts.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+
+#include <asm/ia32.h>
+#include <asm/intrinsics.h>
+#include <asm/uaccess.h>
+#include <asm/rse.h>
+#include <asm/sigcontext.h>
+
+#include "sigframe.h"
+
+#define DEBUG_SIG 0
+#define STACK_ALIGN 16 /* minimal alignment for stack pointer */
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+#if _NSIG_WORDS > 1
+# define PUT_SIGSET(k,u) __copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t))
+# define GET_SIGSET(k,u) __copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t))
+#else
+# define PUT_SIGSET(k,u) __put_user((k)->sig[0], &(u)->sig[0])
+# define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0])
+#endif
+
+long
+ia64_rt_sigsuspend (sigset_t __user *uset, size_t sigsetsize, struct sigscratch *scr)
+{
+ sigset_t oldset, set;
+
+ /* XXX: Don't preclude handling different sized sigset_t's. */
+ if (sigsetsize != sizeof(sigset_t))
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_READ, uset, sigsetsize))
+ return -EFAULT;
+
+ if (GET_SIGSET(&set, uset))
+ return -EFAULT;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+
+ spin_lock_irq(&current->sighand->siglock);
+ {
+ oldset = current->blocked;
+ current->blocked = set;
+ recalc_sigpending();
+ }
+ spin_unlock_irq(&current->sighand->siglock);
+
+ /*
+ * The return below usually returns to the signal handler. We need to
+ * pre-set the correct error code here to ensure that the right values
+ * get saved in sigcontext by ia64_do_signal.
+ */
+ scr->pt.r8 = EINTR;
+ scr->pt.r10 = -1;
+
+ while (1) {
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ if (ia64_do_signal(&oldset, scr, 1))
+ return -EINTR;
+ }
+}
+
+asmlinkage long
+sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2,
+ long arg3, long arg4, long arg5, long arg6, long arg7,
+ struct pt_regs regs)
+{
+ return do_sigaltstack(uss, uoss, regs.r12);
+}
+
+static long
+restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
+{
+ unsigned long ip, flags, nat, um, cfm;
+ long err;
+
+ /* Always make any pending restarted system calls return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+ /* restore scratch that always needs gets updated during signal delivery: */
+ err = __get_user(flags, &sc->sc_flags);
+ err |= __get_user(nat, &sc->sc_nat);
+ err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */
+ err |= __get_user(cfm, &sc->sc_cfm);
+ err |= __get_user(um, &sc->sc_um); /* user mask */
+ err |= __get_user(scr->pt.ar_rsc, &sc->sc_ar_rsc);
+ err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat);
+ err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);
+ err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
+ err |= __get_user(scr->pt.pr, &sc->sc_pr); /* predicates */
+ err |= __get_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */
+ err |= __get_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */
+ err |= __copy_from_user(&scr->pt.r1, &sc->sc_gr[1], 8); /* r1 */
+ err |= __copy_from_user(&scr->pt.r8, &sc->sc_gr[8], 4*8); /* r8-r11 */
+ err |= __copy_from_user(&scr->pt.r12, &sc->sc_gr[12], 2*8); /* r12-r13 */
+ err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8); /* r15 */
+
+ scr->pt.cr_ifs = cfm | (1UL << 63);
+
+ /* establish new instruction pointer: */
+ scr->pt.cr_iip = ip & ~0x3UL;
+ ia64_psr(&scr->pt)->ri = ip & 0x3;
+ scr->pt.cr_ipsr = (scr->pt.cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM);
+
+ scr->scratch_unat = ia64_put_scratch_nat_bits(&scr->pt, nat);
+
+ if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) {
+ /* Restore most scratch-state only when not in syscall. */
+ err |= __get_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */
+ err |= __get_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */
+ err |= __get_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */
+ err |= __copy_from_user(&scr->pt.ar_csd, &sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */
+ err |= __copy_from_user(&scr->pt.r2, &sc->sc_gr[2], 2*8); /* r2-r3 */
+ err |= __copy_from_user(&scr->pt.r16, &sc->sc_gr[16], 16*8); /* r16-r31 */
+ }
+
+ if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) {
+ struct ia64_psr *psr = ia64_psr(&scr->pt);
+
+ __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
+ psr->mfh = 0; /* drop signal handler's fph contents... */
+ if (psr->dfh)
+ ia64_drop_fpu(current);
+ else {
+ /* We already own the local fph, otherwise psr->dfh wouldn't be 0. */
+ __ia64_load_fpu(current->thread.fph);
+ ia64_set_local_fpu_owner(current);
+ }
+ }
+ return err;
+}
+
+int
+copy_siginfo_to_user (siginfo_t __user *to, siginfo_t *from)
+{
+ if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t)))
+ return -EFAULT;
+ if (from->si_code < 0) {
+ if (__copy_to_user(to, from, sizeof(siginfo_t)))
+ return -EFAULT;
+ return 0;
+ } else {
+ int err;
+
+ /*
+ * If you change siginfo_t structure, please be sure this code is fixed
+ * accordingly. It should never copy any pad contained in the structure
+ * to avoid security leaks, but must copy the generic 3 ints plus the
+ * relevant union member.
+ */
+ err = __put_user(from->si_signo, &to->si_signo);
+ err |= __put_user(from->si_errno, &to->si_errno);
+ err |= __put_user((short)from->si_code, &to->si_code);
+ switch (from->si_code >> 16) {
+ case __SI_FAULT >> 16:
+ err |= __put_user(from->si_flags, &to->si_flags);
+ err |= __put_user(from->si_isr, &to->si_isr);
+ case __SI_POLL >> 16:
+ err |= __put_user(from->si_addr, &to->si_addr);
+ err |= __put_user(from->si_imm, &to->si_imm);
+ break;
+ case __SI_TIMER >> 16:
+ err |= __put_user(from->si_tid, &to->si_tid);
+ err |= __put_user(from->si_overrun, &to->si_overrun);
+ err |= __put_user(from->si_ptr, &to->si_ptr);
+ break;
+ case __SI_RT >> 16: /* Not generated by the kernel as of now. */
+ case __SI_MESGQ >> 16:
+ err |= __put_user(from->si_uid, &to->si_uid);
+ err |= __put_user(from->si_pid, &to->si_pid);
+ err |= __put_user(from->si_ptr, &to->si_ptr);
+ break;
+ case __SI_CHLD >> 16:
+ err |= __put_user(from->si_utime, &to->si_utime);
+ err |= __put_user(from->si_stime, &to->si_stime);
+ err |= __put_user(from->si_status, &to->si_status);
+ default:
+ err |= __put_user(from->si_uid, &to->si_uid);
+ err |= __put_user(from->si_pid, &to->si_pid);
+ break;
+ }
+ return err;
+ }
+}
+
+long
+ia64_rt_sigreturn (struct sigscratch *scr)
+{
+ extern char ia64_strace_leave_kernel, ia64_leave_kernel;
+ struct sigcontext __user *sc;
+ struct siginfo si;
+ sigset_t set;
+ long retval;
+
+ sc = &((struct sigframe __user *) (scr->pt.r12 + 16))->sc;
+
+ /*
+ * When we return to the previously executing context, r8 and r10 have already
+ * been setup the way we want them. Indeed, if the signal wasn't delivered while
+ * in a system call, we must not touch r8 or r10 as otherwise user-level state
+ * could be corrupted.
+ */
+ retval = (long) &ia64_leave_kernel;
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ /*
+ * strace expects to be notified after sigreturn returns even though the
+ * context to which we return may not be in the middle of a syscall.
+ * Thus, the return-value that strace displays for sigreturn is
+ * meaningless.
+ */
+ retval = (long) &ia64_strace_leave_kernel;
+
+ if (!access_ok(VERIFY_READ, sc, sizeof(*sc)))
+ goto give_sigsegv;
+
+ if (GET_SIGSET(&set, &sc->sc_mask))
+ goto give_sigsegv;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+
+ spin_lock_irq(&current->sighand->siglock);
+ {
+ current->blocked = set;
+ recalc_sigpending();
+ }
+ spin_unlock_irq(&current->sighand->siglock);
+
+ if (restore_sigcontext(sc, scr))
+ goto give_sigsegv;
+
+#if DEBUG_SIG
+ printk("SIG return (%s:%d): sp=%lx ip=%lx\n",
+ current->comm, current->pid, scr->pt.r12, scr->pt.cr_iip);
+#endif
+ /*
+ * It is more difficult to avoid calling this function than to
+ * call it and ignore errors.
+ */
+ do_sigaltstack(&sc->sc_stack, NULL, scr->pt.r12);
+ return retval;
+
+ give_sigsegv:
+ si.si_signo = SIGSEGV;
+ si.si_errno = 0;
+ si.si_code = SI_KERNEL;
+ si.si_pid = current->pid;
+ si.si_uid = current->uid;
+ si.si_addr = sc;
+ force_sig_info(SIGSEGV, &si, current);
+ return retval;
+}
+
+/*
+ * This does just the minimum required setup of sigcontext.
+ * Specifically, it only installs data that is either not knowable at
+ * the user-level or that gets modified before execution in the
+ * trampoline starts. Everything else is done at the user-level.
+ */
+static long
+setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratch *scr)
+{
+ unsigned long flags = 0, ifs, cfm, nat;
+ long err;
+
+ ifs = scr->pt.cr_ifs;
+
+ if (on_sig_stack((unsigned long) sc))
+ flags |= IA64_SC_FLAG_ONSTACK;
+ if ((ifs & (1UL << 63)) == 0)
+ /* if cr_ifs doesn't have the valid bit set, we got here through a syscall */
+ flags |= IA64_SC_FLAG_IN_SYSCALL;
+ cfm = ifs & ((1UL << 38) - 1);
+ ia64_flush_fph(current);
+ if ((current->thread.flags & IA64_THREAD_FPH_VALID)) {
+ flags |= IA64_SC_FLAG_FPH_VALID;
+ __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16);
+ }
+
+ nat = ia64_get_scratch_nat_bits(&scr->pt, scr->scratch_unat);
+
+ err = __put_user(flags, &sc->sc_flags);
+ err |= __put_user(nat, &sc->sc_nat);
+ err |= PUT_SIGSET(mask, &sc->sc_mask);
+ err |= __put_user(cfm, &sc->sc_cfm);
+ err |= __put_user(scr->pt.cr_ipsr & IA64_PSR_UM, &sc->sc_um);
+ err |= __put_user(scr->pt.ar_rsc, &sc->sc_ar_rsc);
+ err |= __put_user(scr->pt.ar_unat, &sc->sc_ar_unat); /* ar.unat */
+ err |= __put_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); /* ar.fpsr */
+ err |= __put_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
+ err |= __put_user(scr->pt.pr, &sc->sc_pr); /* predicates */
+ err |= __put_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */
+ err |= __put_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */
+ err |= __copy_to_user(&sc->sc_gr[1], &scr->pt.r1, 8); /* r1 */
+ err |= __copy_to_user(&sc->sc_gr[8], &scr->pt.r8, 4*8); /* r8-r11 */
+ err |= __copy_to_user(&sc->sc_gr[12], &scr->pt.r12, 2*8); /* r12-r13 */
+ err |= __copy_to_user(&sc->sc_gr[15], &scr->pt.r15, 8); /* r15 */
+ err |= __put_user(scr->pt.cr_iip + ia64_psr(&scr->pt)->ri, &sc->sc_ip);
+
+ if (flags & IA64_SC_FLAG_IN_SYSCALL) {
+ /* Clear scratch registers if the signal interrupted a system call. */
+ err |= __put_user(0, &sc->sc_ar_ccv); /* ar.ccv */
+ err |= __put_user(0, &sc->sc_br[7]); /* b7 */
+ err |= __put_user(0, &sc->sc_gr[14]); /* r14 */
+ err |= __clear_user(&sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */
+ err |= __clear_user(&sc->sc_gr[2], 2*8); /* r2-r3 */
+ err |= __clear_user(&sc->sc_gr[16], 16*8); /* r16-r31 */
+ } else {
+ /* Copy scratch regs to sigcontext if the signal didn't interrupt a syscall. */
+ err |= __put_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */
+ err |= __put_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */
+ err |= __put_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */
+ err |= __copy_to_user(&sc->sc_ar25, &scr->pt.ar_csd, 2*8); /* ar.csd & ar.ssd */
+ err |= __copy_to_user(&sc->sc_gr[2], &scr->pt.r2, 2*8); /* r2-r3 */
+ err |= __copy_to_user(&sc->sc_gr[16], &scr->pt.r16, 16*8); /* r16-r31 */
+ }
+ return err;
+}
+
+/*
+ * Check whether the register-backing store is already on the signal stack.
+ */
+static inline int
+rbs_on_sig_stack (unsigned long bsp)
+{
+ return (bsp - current->sas_ss_sp < current->sas_ss_size);
+}
+
+static long
+force_sigsegv_info (int sig, void __user *addr)
+{
+ unsigned long flags;
+ struct siginfo si;
+
+ if (sig == SIGSEGV) {
+ /*
+ * Acquiring siglock around the sa_handler-update is almost
+ * certainly overkill, but this isn't a
+ * performance-critical path and I'd rather play it safe
+ * here than having to debug a nasty race if and when
+ * something changes in kernel/signal.c that would make it
+ * no longer safe to modify sa_handler without holding the
+ * lock.
+ */
+ spin_lock_irqsave(&current->sighand->siglock, flags);
+ current->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
+ spin_unlock_irqrestore(&current->sighand->siglock, flags);
+ }
+ si.si_signo = SIGSEGV;
+ si.si_errno = 0;
+ si.si_code = SI_KERNEL;
+ si.si_pid = current->pid;
+ si.si_uid = current->uid;
+ si.si_addr = addr;
+ force_sig_info(SIGSEGV, &si, current);
+ return 0;
+}
+
+static long
+setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set,
+ struct sigscratch *scr)
+{
+ extern char __kernel_sigtramp[];
+ unsigned long tramp_addr, new_rbs = 0;
+ struct sigframe __user *frame;
+ long err;
+
+ frame = (void __user *) scr->pt.r12;
+ tramp_addr = (unsigned long) __kernel_sigtramp;
+ if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags((unsigned long) frame) == 0) {
+ frame = (void __user *) ((current->sas_ss_sp + current->sas_ss_size)
+ & ~(STACK_ALIGN - 1));
+ /*
+ * We need to check for the register stack being on the signal stack
+ * separately, because it's switched separately (memory stack is switched
+ * in the kernel, register stack is switched in the signal trampoline).
+ */
+ if (!rbs_on_sig_stack(scr->pt.ar_bspstore))
+ new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1);
+ }
+ frame = (void __user *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1));
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ return force_sigsegv_info(sig, frame);
+
+ err = __put_user(sig, &frame->arg0);
+ err |= __put_user(&frame->info, &frame->arg1);
+ err |= __put_user(&frame->sc, &frame->arg2);
+ err |= __put_user(new_rbs, &frame->sc.sc_rbs_base);
+ err |= __put_user(0, &frame->sc.sc_loadrs); /* initialize to zero */
+ err |= __put_user(ka->sa.sa_handler, &frame->handler);
+
+ err |= copy_siginfo_to_user(&frame->info, info);
+
+ err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp);
+ err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size);
+ err |= __put_user(sas_ss_flags(scr->pt.r12), &frame->sc.sc_stack.ss_flags);
+ err |= setup_sigcontext(&frame->sc, set, scr);
+
+ if (unlikely(err))
+ return force_sigsegv_info(sig, frame);
+
+ scr->pt.r12 = (unsigned long) frame - 16; /* new stack pointer */
+ scr->pt.ar_fpsr = FPSR_DEFAULT; /* reset fpsr for signal handler */
+ scr->pt.cr_iip = tramp_addr;
+ ia64_psr(&scr->pt)->ri = 0; /* start executing in first slot */
+ ia64_psr(&scr->pt)->be = 0; /* force little-endian byte-order */
+ /*
+ * Force the interruption function mask to zero. This has no effect when a
+ * system-call got interrupted by a signal (since, in that case, scr->pt_cr_ifs is
+ * ignored), but it has the desirable effect of making it possible to deliver a
+ * signal with an incomplete register frame (which happens when a mandatory RSE
+ * load faults). Furthermore, it has no negative effect on the getting the user's
+ * dirty partition preserved, because that's governed by scr->pt.loadrs.
+ */
+ scr->pt.cr_ifs = (1UL << 63);
+
+ /*
+ * Note: this affects only the NaT bits of the scratch regs (the ones saved in
+ * pt_regs), which is exactly what we want.
+ */
+ scr->scratch_unat = 0; /* ensure NaT bits of r12 is clear */
+
+#if DEBUG_SIG
+ printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%p\n",
+ current->comm, current->pid, sig, scr->pt.r12, frame->sc.sc_ip, frame->handler);
+#endif
+ return 1;
+}
+
+static long
+handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset,
+ struct sigscratch *scr)
+{
+ if (IS_IA32_PROCESS(&scr->pt)) {
+ /* send signal to IA-32 process */
+ if (!ia32_setup_frame1(sig, ka, info, oldset, &scr->pt))
+ return 0;
+ } else
+ /* send signal to IA-64 process */
+ if (!setup_frame(sig, ka, info, oldset, scr))
+ return 0;
+
+ if (!(ka->sa.sa_flags & SA_NODEFER)) {
+ spin_lock_irq(&current->sighand->siglock);
+ {
+ sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+ sigaddset(&current->blocked, sig);
+ recalc_sigpending();
+ }
+ spin_unlock_irq(&current->sighand->siglock);
+ }
+ return 1;
+}
+
+/*
+ * Note that `init' is a special process: it doesn't get signals it doesn't want to
+ * handle. Thus you cannot kill init even with a SIGKILL even by mistake.
+ */
+long
+ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
+{
+ struct k_sigaction ka;
+ siginfo_t info;
+ long restart = in_syscall;
+ long errno = scr->pt.r8;
+# define ERR_CODE(c) (IS_IA32_PROCESS(&scr->pt) ? -(c) : (c))
+
+ /*
+ * In the ia64_leave_kernel code path, we want the common case to go fast, which
+ * is why we may in certain cases get here from kernel mode. Just return without
+ * doing anything if so.
+ */
+ if (!user_mode(&scr->pt))
+ return 0;
+
+ if (!oldset)
+ oldset = &current->blocked;
+
+ /*
+ * This only loops in the rare cases of handle_signal() failing, in which case we
+ * need to push through a forced SIGSEGV.
+ */
+ while (1) {
+ int signr = get_signal_to_deliver(&info, &ka, &scr->pt, NULL);
+
+ /*
+ * get_signal_to_deliver() may have run a debugger (via notify_parent())
+ * and the debugger may have modified the state (e.g., to arrange for an
+ * inferior call), thus it's important to check for restarting _after_
+ * get_signal_to_deliver().
+ */
+ if (IS_IA32_PROCESS(&scr->pt)) {
+ if (in_syscall) {
+ if (errno >= 0)
+ restart = 0;
+ else
+ errno = -errno;
+ }
+ } else if ((long) scr->pt.r10 != -1)
+ /*
+ * A system calls has to be restarted only if one of the error codes
+ * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned. If r10
+ * isn't -1 then r8 doesn't hold an error code and we don't need to
+ * restart the syscall, so we can clear the "restart" flag here.
+ */
+ restart = 0;
+
+ if (signr <= 0)
+ break;
+
+ if (unlikely(restart)) {
+ switch (errno) {
+ case ERESTART_RESTARTBLOCK:
+ case ERESTARTNOHAND:
+ scr->pt.r8 = ERR_CODE(EINTR);
+ /* note: scr->pt.r10 is already -1 */
+ break;
+
+ case ERESTARTSYS:
+ if ((ka.sa.sa_flags & SA_RESTART) == 0) {
+ scr->pt.r8 = ERR_CODE(EINTR);
+ /* note: scr->pt.r10 is already -1 */
+ break;
+ }
+ case ERESTARTNOINTR:
+ if (IS_IA32_PROCESS(&scr->pt)) {
+ scr->pt.r8 = scr->pt.r1;
+ scr->pt.cr_iip -= 2;
+ } else
+ ia64_decrement_ip(&scr->pt);
+ restart = 0; /* don't restart twice if handle_signal() fails... */
+ }
+ }
+
+ /*
+ * Whee! Actually deliver the signal. If the delivery failed, we need to
+ * continue to iterate in this loop so we can deliver the SIGSEGV...
+ */
+ if (handle_signal(signr, &ka, &info, oldset, scr))
+ return 1;
+ }
+
+ /* Did we come from a system call? */
+ if (restart) {
+ /* Restart the system call - no handlers present */
+ if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR
+ || errno == ERESTART_RESTARTBLOCK)
+ {
+ if (IS_IA32_PROCESS(&scr->pt)) {
+ scr->pt.r8 = scr->pt.r1;
+ scr->pt.cr_iip -= 2;
+ if (errno == ERESTART_RESTARTBLOCK)
+ scr->pt.r8 = 0; /* x86 version of __NR_restart_syscall */
+ } else {
+ /*
+ * Note: the syscall number is in r15 which is saved in
+ * pt_regs so all we need to do here is adjust ip so that
+ * the "break" instruction gets re-executed.
+ */
+ ia64_decrement_ip(&scr->pt);
+ if (errno == ERESTART_RESTARTBLOCK)
+ scr->pt.r15 = __NR_restart_syscall;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it
+ * could not be delivered. It is important that the target process is not
+ * allowed to do any more work in user space. Possible cases for the target
+ * process:
+ *
+ * - It is sleeping and will wake up soon. Store the data in the current task,
+ * the signal will be sent when the current task returns from the next
+ * interrupt.
+ *
+ * - It is running in user context. Store the data in the current task, the
+ * signal will be sent when the current task returns from the next interrupt.
+ *
+ * - It is running in kernel context on this or another cpu and will return to
+ * user context. Store the data in the target task, the signal will be sent
+ * to itself when the target task returns to user space.
+ *
+ * - It is running in kernel context on this cpu and will sleep before
+ * returning to user context. Because this is also the current task, the
+ * signal will not get delivered and the task could sleep indefinitely.
+ * Store the data in the idle task for this cpu, the signal will be sent
+ * after the idle task processes its next interrupt.
+ *
+ * To cover all cases, store the data in the target task, the current task and
+ * the idle task on this cpu. Whatever happens, the signal will be delivered
+ * to the target task before it can do any useful user space work. Multiple
+ * deliveries have no unwanted side effects.
+ *
+ * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts
+ * disabled. It must not take any locks nor use kernel structures or services
+ * that require locks.
+ */
+
+/* To ensure that we get the right pid, check its start time. To avoid extra
+ * include files in thread_info.h, convert the task start_time to unsigned long,
+ * giving us a cycle time of > 580 years.
+ */
+static inline unsigned long
+start_time_ul(const struct task_struct *t)
+{
+ return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec;
+}
+
+void
+set_sigdelayed(pid_t pid, int signo, int code, void __user *addr)
+{
+ struct task_struct *t;
+ unsigned long start_time = 0;
+ int i;
+
+ for (i = 1; i <= 3; ++i) {
+ switch (i) {
+ case 1:
+ t = find_task_by_pid(pid);
+ if (t)
+ start_time = start_time_ul(t);
+ break;
+ case 2:
+ t = current;
+ break;
+ default:
+ t = idle_task(smp_processor_id());
+ break;
+ }
+
+ if (!t)
+ return;
+ t->thread_info->sigdelayed.signo = signo;
+ t->thread_info->sigdelayed.code = code;
+ t->thread_info->sigdelayed.addr = addr;
+ t->thread_info->sigdelayed.start_time = start_time;
+ t->thread_info->sigdelayed.pid = pid;
+ wmb();
+ set_tsk_thread_flag(t, TIF_SIGDELAYED);
+ }
+}
+
+/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that
+ * was detected in MCA/INIT/NMI/PMI context where it could not be delivered.
+ */
+
+void
+do_sigdelayed(void)
+{
+ struct siginfo siginfo;
+ pid_t pid;
+ struct task_struct *t;
+
+ clear_thread_flag(TIF_SIGDELAYED);
+ memset(&siginfo, 0, sizeof(siginfo));
+ siginfo.si_signo = current_thread_info()->sigdelayed.signo;
+ siginfo.si_code = current_thread_info()->sigdelayed.code;
+ siginfo.si_addr = current_thread_info()->sigdelayed.addr;
+ pid = current_thread_info()->sigdelayed.pid;
+ t = find_task_by_pid(pid);
+ if (!t)
+ return;
+ if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
+ return;
+ force_sig_info(siginfo.si_signo, &siginfo, t);
+}
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
new file mode 100644
index 00000000000..953095e2ce1
--- /dev/null
+++ b/arch/ia64/kernel/smp.c
@@ -0,0 +1,376 @@
+/*
+ * SMP Support
+ *
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Lots of stuff stolen from arch/alpha/kernel/smp.c
+ *
+ * 01/05/16 Rohit Seth <rohit.seth@intel.com> IA64-SMP functions. Reorganized
+ * the existing code (on the lines of x86 port).
+ * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy
+ * calibration on each CPU.
+ * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id
+ * 00/03/31 Rohit Seth <rohit.seth@intel.com> Fixes for Bootstrap Processor
+ * & cpu_online_map now gets done here (instead of setup.c)
+ * 99/10/05 davidm Update to bring it in sync with new command-line processing
+ * scheme.
+ * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and
+ * smp_call_function_single to resend IPI on timeouts
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/kernel_stat.h>
+#include <linux/mm.h>
+#include <linux/cache.h>
+#include <linux/delay.h>
+#include <linux/efi.h>
+#include <linux/bitops.h>
+
+#include <asm/atomic.h>
+#include <asm/current.h>
+#include <asm/delay.h>
+#include <asm/machvec.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/tlbflush.h>
+#include <asm/unistd.h>
+#include <asm/mca.h>
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise static memory
+ * requirements. It also looks cleaner.
+ */
+static __cacheline_aligned DEFINE_SPINLOCK(call_lock);
+
+struct call_data_struct {
+ void (*func) (void *info);
+ void *info;
+ long wait;
+ atomic_t started;
+ atomic_t finished;
+};
+
+static volatile struct call_data_struct *call_data;
+
+#define IPI_CALL_FUNC 0
+#define IPI_CPU_STOP 1
+
+/* This needs to be cacheline aligned because it is written to by *other* CPUs. */
+static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned;
+
+extern void cpu_halt (void);
+
+void
+lock_ipi_calllock(void)
+{
+ spin_lock_irq(&call_lock);
+}
+
+void
+unlock_ipi_calllock(void)
+{
+ spin_unlock_irq(&call_lock);
+}
+
+static void
+stop_this_cpu (void)
+{
+ /*
+ * Remove this CPU:
+ */
+ cpu_clear(smp_processor_id(), cpu_online_map);
+ max_xtp();
+ local_irq_disable();
+ cpu_halt();
+}
+
+void
+cpu_die(void)
+{
+ max_xtp();
+ local_irq_disable();
+ cpu_halt();
+ /* Should never be here */
+ BUG();
+ for (;;);
+}
+
+irqreturn_t
+handle_IPI (int irq, void *dev_id, struct pt_regs *regs)
+{
+ int this_cpu = get_cpu();
+ unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation);
+ unsigned long ops;
+
+ mb(); /* Order interrupt and bit testing. */
+ while ((ops = xchg(pending_ipis, 0)) != 0) {
+ mb(); /* Order bit clearing and data access. */
+ do {
+ unsigned long which;
+
+ which = ffz(~ops);
+ ops &= ~(1 << which);
+
+ switch (which) {
+ case IPI_CALL_FUNC:
+ {
+ struct call_data_struct *data;
+ void (*func)(void *info);
+ void *info;
+ int wait;
+
+ /* release the 'pointer lock' */
+ data = (struct call_data_struct *) call_data;
+ func = data->func;
+ info = data->info;
+ wait = data->wait;
+
+ mb();
+ atomic_inc(&data->started);
+ /*
+ * At this point the structure may be gone unless
+ * wait is true.
+ */
+ (*func)(info);
+
+ /* Notify the sending CPU that the task is done. */
+ mb();
+ if (wait)
+ atomic_inc(&data->finished);
+ }
+ break;
+
+ case IPI_CPU_STOP:
+ stop_this_cpu();
+ break;
+
+ default:
+ printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
+ break;
+ }
+ } while (ops);
+ mb(); /* Order data access and bit testing. */
+ }
+ put_cpu();
+ return IRQ_HANDLED;
+}
+
+/*
+ * Called with preeemption disabled.
+ */
+static inline void
+send_IPI_single (int dest_cpu, int op)
+{
+ set_bit(op, &per_cpu(ipi_operation, dest_cpu));
+ platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0);
+}
+
+/*
+ * Called with preeemption disabled.
+ */
+static inline void
+send_IPI_allbutself (int op)
+{
+ unsigned int i;
+
+ for (i = 0; i < NR_CPUS; i++) {
+ if (cpu_online(i) && i != smp_processor_id())
+ send_IPI_single(i, op);
+ }
+}
+
+/*
+ * Called with preeemption disabled.
+ */
+static inline void
+send_IPI_all (int op)
+{
+ int i;
+
+ for (i = 0; i < NR_CPUS; i++)
+ if (cpu_online(i))
+ send_IPI_single(i, op);
+}
+
+/*
+ * Called with preeemption disabled.
+ */
+static inline void
+send_IPI_self (int op)
+{
+ send_IPI_single(smp_processor_id(), op);
+}
+
+/*
+ * Called with preeemption disabled.
+ */
+void
+smp_send_reschedule (int cpu)
+{
+ platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
+}
+
+void
+smp_flush_tlb_all (void)
+{
+ on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
+}
+
+void
+smp_flush_tlb_mm (struct mm_struct *mm)
+{
+ /* this happens for the common case of a single-threaded fork(): */
+ if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1))
+ {
+ local_finish_flush_tlb_mm(mm);
+ return;
+ }
+
+ /*
+ * We could optimize this further by using mm->cpu_vm_mask to track which CPUs
+ * have been running in the address space. It's not clear that this is worth the
+ * trouble though: to avoid races, we have to raise the IPI on the target CPU
+ * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is
+ * rather trivial.
+ */
+ on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1);
+}
+
+/*
+ * Run a function on another CPU
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <nonatomic> Currently unused.
+ * <wait> If true, wait until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code.
+ *
+ * Does not return until the remote CPU is nearly ready to execute <func>
+ * or is or has executed.
+ */
+
+int
+smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic,
+ int wait)
+{
+ struct call_data_struct data;
+ int cpus = 1;
+ int me = get_cpu(); /* prevent preemption and reschedule on another processor */
+
+ if (cpuid == me) {
+ printk("%s: trying to call self\n", __FUNCTION__);
+ put_cpu();
+ return -EBUSY;
+ }
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ spin_lock_bh(&call_lock);
+
+ call_data = &data;
+ mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */
+ send_IPI_single(cpuid, IPI_CALL_FUNC);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+ cpu_relax();
+
+ if (wait)
+ while (atomic_read(&data.finished) != cpus)
+ cpu_relax();
+ call_data = NULL;
+
+ spin_unlock_bh(&call_lock);
+ put_cpu();
+ return 0;
+}
+EXPORT_SYMBOL(smp_call_function_single);
+
+/*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ */
+
+/*
+ * [SUMMARY] Run a function on all other CPUs.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <nonatomic> currently unused.
+ * <wait> If true, wait (atomically) until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code.
+ *
+ * Does not return until remote CPUs are nearly ready to execute <func> or are or have
+ * executed.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int
+smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait)
+{
+ struct call_data_struct data;
+ int cpus = num_online_cpus()-1;
+
+ if (!cpus)
+ return 0;
+
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ spin_lock(&call_lock);
+
+ call_data = &data;
+ mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */
+ send_IPI_allbutself(IPI_CALL_FUNC);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+ cpu_relax();
+
+ if (wait)
+ while (atomic_read(&data.finished) != cpus)
+ cpu_relax();
+ call_data = NULL;
+
+ spin_unlock(&call_lock);
+ return 0;
+}
+EXPORT_SYMBOL(smp_call_function);
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+void
+smp_send_stop (void)
+{
+ send_IPI_allbutself(IPI_CPU_STOP);
+}
+
+int __init
+setup_profiling_timer (unsigned int multiplier)
+{
+ return -EINVAL;
+}
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
new file mode 100644
index 00000000000..5318f0cbfc2
--- /dev/null
+++ b/arch/ia64/kernel/smpboot.c
@@ -0,0 +1,692 @@
+/*
+ * SMP boot-related support
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 01/05/16 Rohit Seth <rohit.seth@intel.com> Moved SMP booting functions from smp.c to here.
+ * 01/04/27 David Mosberger <davidm@hpl.hp.com> Added ITC synching code.
+ * 02/07/31 David Mosberger <davidm@hpl.hp.com> Switch over to hotplug-CPU boot-sequence.
+ * smp_boot_cpus()/smp_commence() is replaced by
+ * smp_prepare_cpus()/__cpu_up()/smp_cpus_done().
+ */
+#include <linux/config.h>
+
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/efi.h>
+#include <linux/percpu.h>
+#include <linux/bitops.h>
+
+#include <asm/atomic.h>
+#include <asm/cache.h>
+#include <asm/current.h>
+#include <asm/delay.h>
+#include <asm/ia32.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/machvec.h>
+#include <asm/mca.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/tlbflush.h>
+#include <asm/unistd.h>
+
+#define SMP_DEBUG 0
+
+#if SMP_DEBUG
+#define Dprintk(x...) printk(x)
+#else
+#define Dprintk(x...)
+#endif
+
+
+/*
+ * ITC synchronization related stuff:
+ */
+#define MASTER 0
+#define SLAVE (SMP_CACHE_BYTES/8)
+
+#define NUM_ROUNDS 64 /* magic value */
+#define NUM_ITERS 5 /* likewise */
+
+static DEFINE_SPINLOCK(itc_sync_lock);
+static volatile unsigned long go[SLAVE + 1];
+
+#define DEBUG_ITC_SYNC 0
+
+extern void __devinit calibrate_delay (void);
+extern void start_ap (void);
+extern unsigned long ia64_iobase;
+
+task_t *task_for_booting_cpu;
+
+/*
+ * State for each CPU
+ */
+DEFINE_PER_CPU(int, cpu_state);
+
+/* Bitmasks of currently online, and possible CPUs */
+cpumask_t cpu_online_map;
+EXPORT_SYMBOL(cpu_online_map);
+cpumask_t cpu_possible_map;
+EXPORT_SYMBOL(cpu_possible_map);
+
+/* which logical CPU number maps to which CPU (physical APIC ID) */
+volatile int ia64_cpu_to_sapicid[NR_CPUS];
+EXPORT_SYMBOL(ia64_cpu_to_sapicid);
+
+static volatile cpumask_t cpu_callin_map;
+
+struct smp_boot_data smp_boot_data __initdata;
+
+unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */
+
+char __initdata no_int_routing;
+
+unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
+
+static int __init
+nointroute (char *str)
+{
+ no_int_routing = 1;
+ printk ("no_int_routing on\n");
+ return 1;
+}
+
+__setup("nointroute", nointroute);
+
+void
+sync_master (void *arg)
+{
+ unsigned long flags, i;
+
+ go[MASTER] = 0;
+
+ local_irq_save(flags);
+ {
+ for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
+ while (!go[MASTER]);
+ go[MASTER] = 0;
+ go[SLAVE] = ia64_get_itc();
+ }
+ }
+ local_irq_restore(flags);
+}
+
+/*
+ * Return the number of cycles by which our itc differs from the itc on the master
+ * (time-keeper) CPU. A positive number indicates our itc is ahead of the master,
+ * negative that it is behind.
+ */
+static inline long
+get_delta (long *rt, long *master)
+{
+ unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
+ unsigned long tcenter, t0, t1, tm;
+ long i;
+
+ for (i = 0; i < NUM_ITERS; ++i) {
+ t0 = ia64_get_itc();
+ go[MASTER] = 1;
+ while (!(tm = go[SLAVE]));
+ go[SLAVE] = 0;
+ t1 = ia64_get_itc();
+
+ if (t1 - t0 < best_t1 - best_t0)
+ best_t0 = t0, best_t1 = t1, best_tm = tm;
+ }
+
+ *rt = best_t1 - best_t0;
+ *master = best_tm - best_t0;
+
+ /* average best_t0 and best_t1 without overflow: */
+ tcenter = (best_t0/2 + best_t1/2);
+ if (best_t0 % 2 + best_t1 % 2 == 2)
+ ++tcenter;
+ return tcenter - best_tm;
+}
+
+/*
+ * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU
+ * (normally the time-keeper CPU). We use a closed loop to eliminate the possibility of
+ * unaccounted-for errors (such as getting a machine check in the middle of a calibration
+ * step). The basic idea is for the slave to ask the master what itc value it has and to
+ * read its own itc before and after the master responds. Each iteration gives us three
+ * timestamps:
+ *
+ * slave master
+ *
+ * t0 ---\
+ * ---\
+ * --->
+ * tm
+ * /---
+ * /---
+ * t1 <---
+ *
+ *
+ * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0
+ * and t1. If we achieve this, the clocks are synchronized provided the interconnect
+ * between the slave and the master is symmetric. Even if the interconnect were
+ * asymmetric, we would still know that the synchronization error is smaller than the
+ * roundtrip latency (t0 - t1).
+ *
+ * When the interconnect is quiet and symmetric, this lets us synchronize the itc to
+ * within one or two cycles. However, we can only *guarantee* that the synchronization is
+ * accurate to within a round-trip time, which is typically in the range of several
+ * hundred cycles (e.g., ~500 cycles). In practice, this means that the itc's are usually
+ * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better
+ * than half a micro second or so.
+ */
+void
+ia64_sync_itc (unsigned int master)
+{
+ long i, delta, adj, adjust_latency = 0, done = 0;
+ unsigned long flags, rt, master_time_stamp, bound;
+#if DEBUG_ITC_SYNC
+ struct {
+ long rt; /* roundtrip time */
+ long master; /* master's timestamp */
+ long diff; /* difference between midpoint and master's timestamp */
+ long lat; /* estimate of itc adjustment latency */
+ } t[NUM_ROUNDS];
+#endif
+
+ /*
+ * Make sure local timer ticks are disabled while we sync. If
+ * they were enabled, we'd have to worry about nasty issues
+ * like setting the ITC ahead of (or a long time before) the
+ * next scheduled tick.
+ */
+ BUG_ON((ia64_get_itv() & (1 << 16)) == 0);
+
+ go[MASTER] = 1;
+
+ if (smp_call_function_single(master, sync_master, NULL, 1, 0) < 0) {
+ printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master);
+ return;
+ }
+
+ while (go[MASTER]); /* wait for master to be ready */
+
+ spin_lock_irqsave(&itc_sync_lock, flags);
+ {
+ for (i = 0; i < NUM_ROUNDS; ++i) {
+ delta = get_delta(&rt, &master_time_stamp);
+ if (delta == 0) {
+ done = 1; /* let's lock on to this... */
+ bound = rt;
+ }
+
+ if (!done) {
+ if (i > 0) {
+ adjust_latency += -delta;
+ adj = -delta + adjust_latency/4;
+ } else
+ adj = -delta;
+
+ ia64_set_itc(ia64_get_itc() + adj);
+ }
+#if DEBUG_ITC_SYNC
+ t[i].rt = rt;
+ t[i].master = master_time_stamp;
+ t[i].diff = delta;
+ t[i].lat = adjust_latency/4;
+#endif
+ }
+ }
+ spin_unlock_irqrestore(&itc_sync_lock, flags);
+
+#if DEBUG_ITC_SYNC
+ for (i = 0; i < NUM_ROUNDS; ++i)
+ printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
+ t[i].rt, t[i].master, t[i].diff, t[i].lat);
+#endif
+
+ printk(KERN_INFO "CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, "
+ "maxerr %lu cycles)\n", smp_processor_id(), master, delta, rt);
+}
+
+/*
+ * Ideally sets up per-cpu profiling hooks. Doesn't do much now...
+ */
+static inline void __devinit
+smp_setup_percpu_timer (void)
+{
+}
+
+static void __devinit
+smp_callin (void)
+{
+ int cpuid, phys_id;
+ extern void ia64_init_itm(void);
+
+#ifdef CONFIG_PERFMON
+ extern void pfm_init_percpu(void);
+#endif
+
+ cpuid = smp_processor_id();
+ phys_id = hard_smp_processor_id();
+
+ if (cpu_online(cpuid)) {
+ printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
+ phys_id, cpuid);
+ BUG();
+ }
+
+ lock_ipi_calllock();
+ cpu_set(cpuid, cpu_online_map);
+ unlock_ipi_calllock();
+
+ smp_setup_percpu_timer();
+
+ ia64_mca_cmc_vector_setup(); /* Setup vector on AP */
+
+#ifdef CONFIG_PERFMON
+ pfm_init_percpu();
+#endif
+
+ local_irq_enable();
+
+ if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
+ /*
+ * Synchronize the ITC with the BP. Need to do this after irqs are
+ * enabled because ia64_sync_itc() calls smp_call_function_single(), which
+ * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
+ * local_bh_enable(), which bugs out if irqs are not enabled...
+ */
+ Dprintk("Going to syncup ITC with BP.\n");
+ ia64_sync_itc(0);
+ }
+
+ /*
+ * Get our bogomips.
+ */
+ ia64_init_itm();
+ calibrate_delay();
+ local_cpu_data->loops_per_jiffy = loops_per_jiffy;
+
+#ifdef CONFIG_IA32_SUPPORT
+ ia32_gdt_init();
+#endif
+
+ /*
+ * Allow the master to continue.
+ */
+ cpu_set(cpuid, cpu_callin_map);
+ Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid);
+}
+
+
+/*
+ * Activate a secondary processor. head.S calls this.
+ */
+int __devinit
+start_secondary (void *unused)
+{
+ /* Early console may use I/O ports */
+ ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
+
+ Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id());
+ efi_map_pal_code();
+ cpu_init();
+ smp_callin();
+
+ cpu_idle();
+ return 0;
+}
+
+struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
+{
+ return NULL;
+}
+
+struct create_idle {
+ struct task_struct *idle;
+ struct completion done;
+ int cpu;
+};
+
+void
+do_fork_idle(void *_c_idle)
+{
+ struct create_idle *c_idle = _c_idle;
+
+ c_idle->idle = fork_idle(c_idle->cpu);
+ complete(&c_idle->done);
+}
+
+static int __devinit
+do_boot_cpu (int sapicid, int cpu)
+{
+ int timeout;
+ struct create_idle c_idle = {
+ .cpu = cpu,
+ .done = COMPLETION_INITIALIZER(c_idle.done),
+ };
+ DECLARE_WORK(work, do_fork_idle, &c_idle);
+ /*
+ * We can't use kernel_thread since we must avoid to reschedule the child.
+ */
+ if (!keventd_up() || current_is_keventd())
+ work.func(work.data);
+ else {
+ schedule_work(&work);
+ wait_for_completion(&c_idle.done);
+ }
+
+ if (IS_ERR(c_idle.idle))
+ panic("failed fork for CPU %d", cpu);
+ task_for_booting_cpu = c_idle.idle;
+
+ Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid);
+
+ platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);
+
+ /*
+ * Wait 10s total for the AP to start
+ */
+ Dprintk("Waiting on callin_map ...");
+ for (timeout = 0; timeout < 100000; timeout++) {
+ if (cpu_isset(cpu, cpu_callin_map))
+ break; /* It has booted */
+ udelay(100);
+ }
+ Dprintk("\n");
+
+ if (!cpu_isset(cpu, cpu_callin_map)) {
+ printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid);
+ ia64_cpu_to_sapicid[cpu] = -1;
+ cpu_clear(cpu, cpu_online_map); /* was set in smp_callin() */
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int __init
+decay (char *str)
+{
+ int ticks;
+ get_option (&str, &ticks);
+ return 1;
+}
+
+__setup("decay=", decay);
+
+/*
+ * Initialize the logical CPU number to SAPICID mapping
+ */
+void __init
+smp_build_cpu_map (void)
+{
+ int sapicid, cpu, i;
+ int boot_cpu_id = hard_smp_processor_id();
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ ia64_cpu_to_sapicid[cpu] = -1;
+#ifdef CONFIG_HOTPLUG_CPU
+ cpu_set(cpu, cpu_possible_map);
+#endif
+ }
+
+ ia64_cpu_to_sapicid[0] = boot_cpu_id;
+ cpus_clear(cpu_present_map);
+ cpu_set(0, cpu_present_map);
+ cpu_set(0, cpu_possible_map);
+ for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) {
+ sapicid = smp_boot_data.cpu_phys_id[i];
+ if (sapicid == boot_cpu_id)
+ continue;
+ cpu_set(cpu, cpu_present_map);
+ cpu_set(cpu, cpu_possible_map);
+ ia64_cpu_to_sapicid[cpu] = sapicid;
+ cpu++;
+ }
+}
+
+#ifdef CONFIG_NUMA
+
+/* on which node is each logical CPU (one cacheline even for 64 CPUs) */
+u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_to_node_map);
+/* which logical CPUs are on which nodes */
+cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+
+/*
+ * Build cpu to node mapping and initialize the per node cpu masks.
+ */
+void __init
+build_cpu_to_node_map (void)
+{
+ int cpu, i, node;
+
+ for(node=0; node<MAX_NUMNODES; node++)
+ cpus_clear(node_to_cpu_mask[node]);
+ for(cpu = 0; cpu < NR_CPUS; ++cpu) {
+ /*
+ * All Itanium NUMA platforms I know use ACPI, so maybe we
+ * can drop this ifdef completely. [EF]
+ */
+#ifdef CONFIG_ACPI_NUMA
+ node = -1;
+ for (i = 0; i < NR_CPUS; ++i)
+ if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
+ node = node_cpuid[i].nid;
+ break;
+ }
+#else
+# error Fixme: Dunno how to build CPU-to-node map.
+#endif
+ cpu_to_node_map[cpu] = (node >= 0) ? node : 0;
+ if (node >= 0)
+ cpu_set(cpu, node_to_cpu_mask[node]);
+ }
+}
+
+#endif /* CONFIG_NUMA */
+
+/*
+ * Cycle through the APs sending Wakeup IPIs to boot each.
+ */
+void __init
+smp_prepare_cpus (unsigned int max_cpus)
+{
+ int boot_cpu_id = hard_smp_processor_id();
+
+ /*
+ * Initialize the per-CPU profiling counter/multiplier
+ */
+
+ smp_setup_percpu_timer();
+
+ /*
+ * We have the boot CPU online for sure.
+ */
+ cpu_set(0, cpu_online_map);
+ cpu_set(0, cpu_callin_map);
+
+ local_cpu_data->loops_per_jiffy = loops_per_jiffy;
+ ia64_cpu_to_sapicid[0] = boot_cpu_id;
+
+ printk(KERN_INFO "Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id);
+
+ current_thread_info()->cpu = 0;
+
+ /*
+ * If SMP should be disabled, then really disable it!
+ */
+ if (!max_cpus) {
+ printk(KERN_INFO "SMP mode deactivated.\n");
+ cpus_clear(cpu_online_map);
+ cpus_clear(cpu_present_map);
+ cpus_clear(cpu_possible_map);
+ cpu_set(0, cpu_online_map);
+ cpu_set(0, cpu_present_map);
+ cpu_set(0, cpu_possible_map);
+ return;
+ }
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+ cpu_set(smp_processor_id(), cpu_online_map);
+ cpu_set(smp_processor_id(), cpu_callin_map);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void fixup_irqs(void);
+/* must be called with cpucontrol mutex held */
+static int __devinit cpu_enable(unsigned int cpu)
+{
+ per_cpu(cpu_state,cpu) = CPU_UP_PREPARE;
+ wmb();
+
+ while (!cpu_online(cpu))
+ cpu_relax();
+ return 0;
+}
+
+int __cpu_disable(void)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * dont permit boot processor for now
+ */
+ if (cpu == 0)
+ return -EBUSY;
+
+ fixup_irqs();
+ local_flush_tlb_all();
+ printk ("Disabled cpu %u\n", smp_processor_id());
+ return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ unsigned int i;
+
+ for (i = 0; i < 100; i++) {
+ /* They ack this in play_dead by setting CPU_DEAD */
+ if (per_cpu(cpu_state, cpu) == CPU_DEAD)
+ {
+ /*
+ * TBD: Enable this when physical removal
+ * or when we put the processor is put in
+ * SAL_BOOT_RENDEZ mode
+ * cpu_clear(cpu, cpu_callin_map);
+ */
+ return;
+ }
+ msleep(100);
+ }
+ printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+}
+#else /* !CONFIG_HOTPLUG_CPU */
+static int __devinit cpu_enable(unsigned int cpu)
+{
+ return 0;
+}
+
+int __cpu_disable(void)
+{
+ return -ENOSYS;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ /* We said "no" in __cpu_disable */
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+void
+smp_cpus_done (unsigned int dummy)
+{
+ int cpu;
+ unsigned long bogosum = 0;
+
+ /*
+ * Allow the user to impress friends.
+ */
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ if (cpu_online(cpu))
+ bogosum += cpu_data(cpu)->loops_per_jiffy;
+
+ printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+ (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100);
+}
+
+int __devinit
+__cpu_up (unsigned int cpu)
+{
+ int ret;
+ int sapicid;
+
+ sapicid = ia64_cpu_to_sapicid[cpu];
+ if (sapicid == -1)
+ return -EINVAL;
+
+ /*
+ * Already booted.. just enable and get outa idle lool
+ */
+ if (cpu_isset(cpu, cpu_callin_map))
+ {
+ cpu_enable(cpu);
+ local_irq_enable();
+ while (!cpu_isset(cpu, cpu_online_map))
+ mb();
+ return 0;
+ }
+ /* Processor goes to start_secondary(), sets online flag */
+ ret = do_boot_cpu(sapicid, cpu);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/*
+ * Assume that CPU's have been discovered by some platform-dependent interface. For
+ * SoftSDV/Lion, that would be ACPI.
+ *
+ * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP().
+ */
+void __init
+init_smp_config(void)
+{
+ struct fptr {
+ unsigned long fp;
+ unsigned long gp;
+ } *ap_startup;
+ long sal_ret;
+
+ /* Tell SAL where to drop the AP's. */
+ ap_startup = (struct fptr *) start_ap;
+ sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
+ ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0);
+ if (sal_ret < 0)
+ printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n",
+ ia64_sal_strerror(sal_ret));
+}
+
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
new file mode 100644
index 00000000000..3ac216e1c8b
--- /dev/null
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -0,0 +1,298 @@
+/*
+ * This file contains various system calls that have different calling
+ * conventions on different platforms.
+ *
+ * Copyright (C) 1999-2000, 2002-2003, 2005 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/shm.h>
+#include <linux/file.h> /* doh, must come after sched.h... */
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/syscalls.h>
+#include <linux/highuid.h>
+#include <linux/hugetlb.h>
+
+#include <asm/shmparam.h>
+#include <asm/uaccess.h>
+
+unsigned long
+arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ long map_shared = (flags & MAP_SHARED);
+ unsigned long start_addr, align_mask = PAGE_SIZE - 1;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+
+ if (len > RGN_MAP_LIMIT)
+ return -ENOMEM;
+
+#ifdef CONFIG_HUGETLB_PAGE
+ if (REGION_NUMBER(addr) == REGION_HPAGE)
+ addr = 0;
+#endif
+ if (!addr)
+ addr = mm->free_area_cache;
+
+ if (map_shared && (TASK_SIZE > 0xfffffffful))
+ /*
+ * For 64-bit tasks, align shared segments to 1MB to avoid potential
+ * performance penalty due to virtual aliasing (see ASDM). For 32-bit
+ * tasks, we prefer to avoid exhausting the address space too quickly by
+ * limiting alignment to a single page.
+ */
+ align_mask = SHMLBA - 1;
+
+ full_search:
+ start_addr = addr = (addr + align_mask) & ~align_mask;
+
+ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+ /* At this point: (!vma || addr < vma->vm_end). */
+ if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) {
+ if (start_addr != TASK_UNMAPPED_BASE) {
+ /* Start a new search --- just in case we missed some holes. */
+ addr = TASK_UNMAPPED_BASE;
+ goto full_search;
+ }
+ return -ENOMEM;
+ }
+ if (!vma || addr + len <= vma->vm_start) {
+ /* Remember the address where we stopped this search: */
+ mm->free_area_cache = addr + len;
+ return addr;
+ }
+ addr = (vma->vm_end + align_mask) & ~align_mask;
+ }
+}
+
+asmlinkage long
+ia64_getpriority (int which, int who)
+{
+ long prio;
+
+ prio = sys_getpriority(which, who);
+ if (prio >= 0) {
+ force_successful_syscall_return();
+ prio = 20 - prio;
+ }
+ return prio;
+}
+
+/* XXX obsolete, but leave it here until the old libc is gone... */
+asmlinkage unsigned long
+sys_getpagesize (void)
+{
+ return PAGE_SIZE;
+}
+
+asmlinkage unsigned long
+ia64_shmat (int shmid, void __user *shmaddr, int shmflg)
+{
+ unsigned long raddr;
+ int retval;
+
+ retval = do_shmat(shmid, shmaddr, shmflg, &raddr);
+ if (retval < 0)
+ return retval;
+
+ force_successful_syscall_return();
+ return raddr;
+}
+
+asmlinkage unsigned long
+ia64_brk (unsigned long brk)
+{
+ unsigned long rlim, retval, newbrk, oldbrk;
+ struct mm_struct *mm = current->mm;
+
+ /*
+ * Most of this replicates the code in sys_brk() except for an additional safety
+ * check and the clearing of r8. However, we can't call sys_brk() because we need
+ * to acquire the mmap_sem before we can do the test...
+ */
+ down_write(&mm->mmap_sem);
+
+ if (brk < mm->end_code)
+ goto out;
+ newbrk = PAGE_ALIGN(brk);
+ oldbrk = PAGE_ALIGN(mm->brk);
+ if (oldbrk == newbrk)
+ goto set_brk;
+
+ /* Always allow shrinking brk. */
+ if (brk <= mm->brk) {
+ if (!do_munmap(mm, newbrk, oldbrk-newbrk))
+ goto set_brk;
+ goto out;
+ }
+
+ /* Check against unimplemented/unmapped addresses: */
+ if ((newbrk - oldbrk) > RGN_MAP_LIMIT || REGION_OFFSET(newbrk) > RGN_MAP_LIMIT)
+ goto out;
+
+ /* Check against rlimit.. */
+ rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+ if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+ goto out;
+
+ /* Check against existing mmap mappings. */
+ if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+ goto out;
+
+ /* Ok, looks good - let it rip. */
+ if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
+ goto out;
+set_brk:
+ mm->brk = brk;
+out:
+ retval = mm->brk;
+ up_write(&mm->mmap_sem);
+ force_successful_syscall_return();
+ return retval;
+}
+
+/*
+ * On IA-64, we return the two file descriptors in ret0 and ret1 (r8
+ * and r9) as this is faster than doing a copy_to_user().
+ */
+asmlinkage long
+sys_pipe (void)
+{
+ struct pt_regs *regs = ia64_task_regs(current);
+ int fd[2];
+ int retval;
+
+ retval = do_pipe(fd);
+ if (retval)
+ goto out;
+ retval = fd[0];
+ regs->r9 = fd[1];
+ out:
+ return retval;
+}
+
+static inline unsigned long
+do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff)
+{
+ unsigned long roff;
+ struct file *file = NULL;
+
+ flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+ if (!(flags & MAP_ANONYMOUS)) {
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+
+ if (!file->f_op || !file->f_op->mmap) {
+ addr = -ENODEV;
+ goto out;
+ }
+ }
+
+ /*
+ * A zero mmap always succeeds in Linux, independent of whether or not the
+ * remaining arguments are valid.
+ */
+ if (len == 0)
+ goto out;
+
+ /* Careful about overflows.. */
+ len = PAGE_ALIGN(len);
+ if (!len || len > TASK_SIZE) {
+ addr = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Don't permit mappings into unmapped space, the virtual page table of a region,
+ * or across a region boundary. Note: RGN_MAP_LIMIT is equal to 2^n-PAGE_SIZE
+ * (for some integer n <= 61) and len > 0.
+ */
+ roff = REGION_OFFSET(addr);
+ if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len))) {
+ addr = -EINVAL;
+ goto out;
+ }
+
+ down_write(&current->mm->mmap_sem);
+ addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+ up_write(&current->mm->mmap_sem);
+
+out: if (file)
+ fput(file);
+ return addr;
+}
+
+/*
+ * mmap2() is like mmap() except that the offset is expressed in units
+ * of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces
+ * of) files that are larger than the address space of the CPU.
+ */
+asmlinkage unsigned long
+sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff)
+{
+ addr = do_mmap2(addr, len, prot, flags, fd, pgoff);
+ if (!IS_ERR((void *) addr))
+ force_successful_syscall_return();
+ return addr;
+}
+
+asmlinkage unsigned long
+sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, long off)
+{
+ if (offset_in_page(off) != 0)
+ return -EINVAL;
+
+ addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+ if (!IS_ERR((void *) addr))
+ force_successful_syscall_return();
+ return addr;
+}
+
+asmlinkage unsigned long
+ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags,
+ unsigned long new_addr)
+{
+ extern unsigned long do_mremap (unsigned long addr,
+ unsigned long old_len,
+ unsigned long new_len,
+ unsigned long flags,
+ unsigned long new_addr);
+
+ down_write(&current->mm->mmap_sem);
+ {
+ addr = do_mremap(addr, old_len, new_len, flags, new_addr);
+ }
+ up_write(&current->mm->mmap_sem);
+
+ if (IS_ERR((void *) addr))
+ return addr;
+
+ force_successful_syscall_return();
+ return addr;
+}
+
+#ifndef CONFIG_PCI
+
+asmlinkage long
+sys_pciconfig_read (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
+ void *buf)
+{
+ return -ENOSYS;
+}
+
+asmlinkage long
+sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
+ void *buf)
+{
+ return -ENOSYS;
+}
+
+#endif /* CONFIG_PCI */
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
new file mode 100644
index 00000000000..8b8a5a45b62
--- /dev/null
+++ b/arch/ia64/kernel/time.c
@@ -0,0 +1,255 @@
+/*
+ * linux/arch/ia64/kernel/time.c
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999-2000 VA Linux Systems
+ * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
+ */
+#include <linux/config.h>
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/profile.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/interrupt.h>
+#include <linux/efi.h>
+#include <linux/profile.h>
+#include <linux/timex.h>
+
+#include <asm/machvec.h>
+#include <asm/delay.h>
+#include <asm/hw_irq.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+
+extern unsigned long wall_jiffies;
+
+u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
+
+EXPORT_SYMBOL(jiffies_64);
+
+#define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */
+
+#ifdef CONFIG_IA64_DEBUG_IRQ
+
+unsigned long last_cli_ip;
+EXPORT_SYMBOL(last_cli_ip);
+
+#endif
+
+static struct time_interpolator itc_interpolator = {
+ .shift = 16,
+ .mask = 0xffffffffffffffffLL,
+ .source = TIME_SOURCE_CPU
+};
+
+static irqreturn_t
+timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+{
+ unsigned long new_itm;
+
+ if (unlikely(cpu_is_offline(smp_processor_id()))) {
+ return IRQ_HANDLED;
+ }
+
+ platform_timer_interrupt(irq, dev_id, regs);
+
+ new_itm = local_cpu_data->itm_next;
+
+ if (!time_after(ia64_get_itc(), new_itm))
+ printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
+ ia64_get_itc(), new_itm);
+
+ profile_tick(CPU_PROFILING, regs);
+
+ while (1) {
+ update_process_times(user_mode(regs));
+
+ new_itm += local_cpu_data->itm_delta;
+
+ if (smp_processor_id() == TIME_KEEPER_ID) {
+ /*
+ * Here we are in the timer irq handler. We have irqs locally
+ * disabled, but we don't know if the timer_bh is running on
+ * another CPU. We need to avoid to SMP race by acquiring the
+ * xtime_lock.
+ */
+ write_seqlock(&xtime_lock);
+ do_timer(regs);
+ local_cpu_data->itm_next = new_itm;
+ write_sequnlock(&xtime_lock);
+ } else
+ local_cpu_data->itm_next = new_itm;
+
+ if (time_after(new_itm, ia64_get_itc()))
+ break;
+ }
+
+ do {
+ /*
+ * If we're too close to the next clock tick for
+ * comfort, we increase the safety margin by
+ * intentionally dropping the next tick(s). We do NOT
+ * update itm.next because that would force us to call
+ * do_timer() which in turn would let our clock run
+ * too fast (with the potentially devastating effect
+ * of losing monotony of time).
+ */
+ while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
+ new_itm += local_cpu_data->itm_delta;
+ ia64_set_itm(new_itm);
+ /* double check, in case we got hit by a (slow) PMI: */
+ } while (time_after_eq(ia64_get_itc(), new_itm));
+ return IRQ_HANDLED;
+}
+
+/*
+ * Encapsulate access to the itm structure for SMP.
+ */
+void
+ia64_cpu_local_tick (void)
+{
+ int cpu = smp_processor_id();
+ unsigned long shift = 0, delta;
+
+ /* arrange for the cycle counter to generate a timer interrupt: */
+ ia64_set_itv(IA64_TIMER_VECTOR);
+
+ delta = local_cpu_data->itm_delta;
+ /*
+ * Stagger the timer tick for each CPU so they don't occur all at (almost) the
+ * same time:
+ */
+ if (cpu) {
+ unsigned long hi = 1UL << ia64_fls(cpu);
+ shift = (2*(cpu - hi) + 1) * delta/hi/2;
+ }
+ local_cpu_data->itm_next = ia64_get_itc() + delta + shift;
+ ia64_set_itm(local_cpu_data->itm_next);
+}
+
+static int nojitter;
+
+static int __init nojitter_setup(char *str)
+{
+ nojitter = 1;
+ printk("Jitter checking for ITC timers disabled\n");
+ return 1;
+}
+
+__setup("nojitter", nojitter_setup);
+
+
+void __devinit
+ia64_init_itm (void)
+{
+ unsigned long platform_base_freq, itc_freq;
+ struct pal_freq_ratio itc_ratio, proc_ratio;
+ long status, platform_base_drift, itc_drift;
+
+ /*
+ * According to SAL v2.6, we need to use a SAL call to determine the platform base
+ * frequency and then a PAL call to determine the frequency ratio between the ITC
+ * and the base frequency.
+ */
+ status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
+ &platform_base_freq, &platform_base_drift);
+ if (status != 0) {
+ printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
+ } else {
+ status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio);
+ if (status != 0)
+ printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status);
+ }
+ if (status != 0) {
+ /* invent "random" values */
+ printk(KERN_ERR
+ "SAL/PAL failed to obtain frequency info---inventing reasonable values\n");
+ platform_base_freq = 100000000;
+ platform_base_drift = -1; /* no drift info */
+ itc_ratio.num = 3;
+ itc_ratio.den = 1;
+ }
+ if (platform_base_freq < 40000000) {
+ printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n",
+ platform_base_freq);
+ platform_base_freq = 75000000;
+ platform_base_drift = -1;
+ }
+ if (!proc_ratio.den)
+ proc_ratio.den = 1; /* avoid division by zero */
+ if (!itc_ratio.den)
+ itc_ratio.den = 1; /* avoid division by zero */
+
+ itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
+
+ local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ;
+ printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, "
+ "ITC freq=%lu.%03luMHz", smp_processor_id(),
+ platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
+ itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
+
+ if (platform_base_drift != -1) {
+ itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den;
+ printk("+/-%ldppm\n", itc_drift);
+ } else {
+ itc_drift = -1;
+ printk("\n");
+ }
+
+ local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den;
+ local_cpu_data->itc_freq = itc_freq;
+ local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC;
+ local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<<IA64_NSEC_PER_CYC_SHIFT)
+ + itc_freq/2)/itc_freq;
+
+ if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
+ itc_interpolator.frequency = local_cpu_data->itc_freq;
+ itc_interpolator.drift = itc_drift;
+#ifdef CONFIG_SMP
+ /* On IA64 in an SMP configuration ITCs are never accurately synchronized.
+ * Jitter compensation requires a cmpxchg which may limit
+ * the scalability of the syscalls for retrieving time.
+ * The ITC synchronization is usually successful to within a few
+ * ITC ticks but this is not a sure thing. If you need to improve
+ * timer performance in SMP situations then boot the kernel with the
+ * "nojitter" option. However, doing so may result in time fluctuating (maybe
+ * even going backward) if the ITC offsets between the individual CPUs
+ * are too large.
+ */
+ if (!nojitter) itc_interpolator.jitter = 1;
+#endif
+ register_time_interpolator(&itc_interpolator);
+ }
+
+ /* Setup the CPU local timer tick */
+ ia64_cpu_local_tick();
+}
+
+static struct irqaction timer_irqaction = {
+ .handler = timer_interrupt,
+ .flags = SA_INTERRUPT,
+ .name = "timer"
+};
+
+void __init
+time_init (void)
+{
+ register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction);
+ efi_gettimeofday(&xtime);
+ ia64_init_itm();
+
+ /*
+ * Initialize wall_to_monotonic such that adding it to xtime will yield zero, the
+ * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC).
+ */
+ set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
+}
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
new file mode 100644
index 00000000000..f1aafd4c05f
--- /dev/null
+++ b/arch/ia64/kernel/topology.c
@@ -0,0 +1,92 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * This file contains NUMA specific variables and functions which can
+ * be split away from DISCONTIGMEM and are used on NUMA machines with
+ * contiguous memory.
+ * 2002/08/07 Erich Focht <efocht@ess.nec.de>
+ * Populate cpu entries in sysfs for non-numa systems as well
+ * Intel Corporation - Ashok Raj
+ */
+
+#include <linux/config.h>
+#include <linux/cpu.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/node.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/nodemask.h>
+#include <asm/mmzone.h>
+#include <asm/numa.h>
+#include <asm/cpu.h>
+
+#ifdef CONFIG_NUMA
+static struct node *sysfs_nodes;
+#endif
+static struct ia64_cpu *sysfs_cpus;
+
+int arch_register_cpu(int num)
+{
+ struct node *parent = NULL;
+
+#ifdef CONFIG_NUMA
+ parent = &sysfs_nodes[cpu_to_node(num)];
+#endif /* CONFIG_NUMA */
+
+ return register_cpu(&sysfs_cpus[num].cpu, num, parent);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void arch_unregister_cpu(int num)
+{
+ struct node *parent = NULL;
+
+#ifdef CONFIG_NUMA
+ int node = cpu_to_node(num);
+ parent = &sysfs_nodes[node];
+#endif /* CONFIG_NUMA */
+
+ return unregister_cpu(&sysfs_cpus[num].cpu, parent);
+}
+EXPORT_SYMBOL(arch_register_cpu);
+EXPORT_SYMBOL(arch_unregister_cpu);
+#endif /*CONFIG_HOTPLUG_CPU*/
+
+
+static int __init topology_init(void)
+{
+ int i, err = 0;
+
+#ifdef CONFIG_NUMA
+ sysfs_nodes = kmalloc(sizeof(struct node) * MAX_NUMNODES, GFP_KERNEL);
+ if (!sysfs_nodes) {
+ err = -ENOMEM;
+ goto out;
+ }
+ memset(sysfs_nodes, 0, sizeof(struct node) * MAX_NUMNODES);
+
+ /* MCD - Do we want to register all ONLINE nodes, or all POSSIBLE nodes? */
+ for_each_online_node(i)
+ if ((err = register_node(&sysfs_nodes[i], i, 0)))
+ goto out;
+#endif
+
+ sysfs_cpus = kmalloc(sizeof(struct ia64_cpu) * NR_CPUS, GFP_KERNEL);
+ if (!sysfs_cpus) {
+ err = -ENOMEM;
+ goto out;
+ }
+ memset(sysfs_cpus, 0, sizeof(struct ia64_cpu) * NR_CPUS);
+
+ for_each_present_cpu(i)
+ if((err = arch_register_cpu(i)))
+ goto out;
+out:
+ return err;
+}
+
+__initcall(topology_init);
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
new file mode 100644
index 00000000000..e82ad78081b
--- /dev/null
+++ b/arch/ia64/kernel/traps.c
@@ -0,0 +1,609 @@
+/*
+ * Architecture-specific trap handling.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 05/12/00 grao <goutham.rao@intel.com> : added isr in siginfo for SIGFPE
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h> /* For unblank_screen() */
+#include <linux/module.h> /* for EXPORT_SYMBOL */
+#include <linux/hardirq.h>
+
+#include <asm/fpswa.h>
+#include <asm/ia32.h>
+#include <asm/intrinsics.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+
+extern spinlock_t timerlist_lock;
+
+fpswa_interface_t *fpswa_interface;
+EXPORT_SYMBOL(fpswa_interface);
+
+void __init
+trap_init (void)
+{
+ if (ia64_boot_param->fpswa)
+ /* FPSWA fixup: make the interface pointer a kernel virtual address: */
+ fpswa_interface = __va(ia64_boot_param->fpswa);
+}
+
+/*
+ * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock
+ * is acquired through the console unblank code)
+ */
+void
+bust_spinlocks (int yes)
+{
+ int loglevel_save = console_loglevel;
+
+ if (yes) {
+ oops_in_progress = 1;
+ return;
+ }
+
+#ifdef CONFIG_VT
+ unblank_screen();
+#endif
+ oops_in_progress = 0;
+ /*
+ * OK, the message is on the console. Now we call printk() without
+ * oops_in_progress set so that printk will give klogd a poke. Hold onto
+ * your hats...
+ */
+ console_loglevel = 15; /* NMI oopser may have shut the console up */
+ printk(" ");
+ console_loglevel = loglevel_save;
+}
+
+void
+die (const char *str, struct pt_regs *regs, long err)
+{
+ static struct {
+ spinlock_t lock;
+ u32 lock_owner;
+ int lock_owner_depth;
+ } die = {
+ .lock = SPIN_LOCK_UNLOCKED,
+ .lock_owner = -1,
+ .lock_owner_depth = 0
+ };
+ static int die_counter;
+
+ if (die.lock_owner != smp_processor_id()) {
+ console_verbose();
+ spin_lock_irq(&die.lock);
+ die.lock_owner = smp_processor_id();
+ die.lock_owner_depth = 0;
+ bust_spinlocks(1);
+ }
+
+ if (++die.lock_owner_depth < 3) {
+ printk("%s[%d]: %s %ld [%d]\n",
+ current->comm, current->pid, str, err, ++die_counter);
+ show_regs(regs);
+ } else
+ printk(KERN_ERR "Recursive die() failure, output suppressed\n");
+
+ bust_spinlocks(0);
+ die.lock_owner = -1;
+ spin_unlock_irq(&die.lock);
+ do_exit(SIGSEGV);
+}
+
+void
+die_if_kernel (char *str, struct pt_regs *regs, long err)
+{
+ if (!user_mode(regs))
+ die(str, regs, err);
+}
+
+void
+ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
+{
+ siginfo_t siginfo;
+ int sig, code;
+
+ /* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */
+ siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
+ siginfo.si_imm = break_num;
+ siginfo.si_flags = 0; /* clear __ISR_VALID */
+ siginfo.si_isr = 0;
+
+ switch (break_num) {
+ case 0: /* unknown error (used by GCC for __builtin_abort()) */
+ die_if_kernel("bugcheck!", regs, break_num);
+ sig = SIGILL; code = ILL_ILLOPC;
+ break;
+
+ case 1: /* integer divide by zero */
+ sig = SIGFPE; code = FPE_INTDIV;
+ break;
+
+ case 2: /* integer overflow */
+ sig = SIGFPE; code = FPE_INTOVF;
+ break;
+
+ case 3: /* range check/bounds check */
+ sig = SIGFPE; code = FPE_FLTSUB;
+ break;
+
+ case 4: /* null pointer dereference */
+ sig = SIGSEGV; code = SEGV_MAPERR;
+ break;
+
+ case 5: /* misaligned data */
+ sig = SIGSEGV; code = BUS_ADRALN;
+ break;
+
+ case 6: /* decimal overflow */
+ sig = SIGFPE; code = __FPE_DECOVF;
+ break;
+
+ case 7: /* decimal divide by zero */
+ sig = SIGFPE; code = __FPE_DECDIV;
+ break;
+
+ case 8: /* packed decimal error */
+ sig = SIGFPE; code = __FPE_DECERR;
+ break;
+
+ case 9: /* invalid ASCII digit */
+ sig = SIGFPE; code = __FPE_INVASC;
+ break;
+
+ case 10: /* invalid decimal digit */
+ sig = SIGFPE; code = __FPE_INVDEC;
+ break;
+
+ case 11: /* paragraph stack overflow */
+ sig = SIGSEGV; code = __SEGV_PSTKOVF;
+ break;
+
+ case 0x3f000 ... 0x3ffff: /* bundle-update in progress */
+ sig = SIGILL; code = __ILL_BNDMOD;
+ break;
+
+ default:
+ if (break_num < 0x40000 || break_num > 0x100000)
+ die_if_kernel("Bad break", regs, break_num);
+
+ if (break_num < 0x80000) {
+ sig = SIGILL; code = __ILL_BREAK;
+ } else {
+ sig = SIGTRAP; code = TRAP_BRKPT;
+ }
+ }
+ siginfo.si_signo = sig;
+ siginfo.si_errno = 0;
+ siginfo.si_code = code;
+ force_sig_info(sig, &siginfo, current);
+}
+
+/*
+ * disabled_fph_fault() is called when a user-level process attempts to access f32..f127
+ * and it doesn't own the fp-high register partition. When this happens, we save the
+ * current fph partition in the task_struct of the fpu-owner (if necessary) and then load
+ * the fp-high partition of the current task (if necessary). Note that the kernel has
+ * access to fph by the time we get here, as the IVT's "Disabled FP-Register" handler takes
+ * care of clearing psr.dfh.
+ */
+static inline void
+disabled_fph_fault (struct pt_regs *regs)
+{
+ struct ia64_psr *psr = ia64_psr(regs);
+
+ /* first, grant user-level access to fph partition: */
+ psr->dfh = 0;
+#ifndef CONFIG_SMP
+ {
+ struct task_struct *fpu_owner
+ = (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER);
+
+ if (ia64_is_local_fpu_owner(current))
+ return;
+
+ if (fpu_owner)
+ ia64_flush_fph(fpu_owner);
+ }
+#endif /* !CONFIG_SMP */
+ ia64_set_local_fpu_owner(current);
+ if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) {
+ __ia64_load_fpu(current->thread.fph);
+ psr->mfh = 0;
+ } else {
+ __ia64_init_fpu();
+ /*
+ * Set mfh because the state in thread.fph does not match the state in
+ * the fph partition.
+ */
+ psr->mfh = 1;
+ }
+}
+
+static inline int
+fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs,
+ struct pt_regs *regs)
+{
+ fp_state_t fp_state;
+ fpswa_ret_t ret;
+
+ if (!fpswa_interface)
+ return -1;
+
+ memset(&fp_state, 0, sizeof(fp_state_t));
+
+ /*
+ * compute fp_state. only FP registers f6 - f11 are used by the
+ * kernel, so set those bits in the mask and set the low volatile
+ * pointer to point to these registers.
+ */
+ fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */
+
+ fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
+ /*
+ * unsigned long (*EFI_FPSWA) (
+ * unsigned long trap_type,
+ * void *Bundle,
+ * unsigned long *pipsr,
+ * unsigned long *pfsr,
+ * unsigned long *pisr,
+ * unsigned long *ppreds,
+ * unsigned long *pifs,
+ * void *fp_state);
+ */
+ ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle,
+ (unsigned long *) ipsr, (unsigned long *) fpsr,
+ (unsigned long *) isr, (unsigned long *) pr,
+ (unsigned long *) ifs, &fp_state);
+
+ return ret.status;
+}
+
+/*
+ * Handle floating-point assist faults and traps.
+ */
+static int
+handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
+{
+ long exception, bundle[2];
+ unsigned long fault_ip;
+ struct siginfo siginfo;
+ static int fpu_swa_count = 0;
+ static unsigned long last_time;
+
+ fault_ip = regs->cr_iip;
+ if (!fp_fault && (ia64_psr(regs)->ri == 0))
+ fault_ip -= 16;
+ if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle)))
+ return -1;
+
+ if (jiffies - last_time > 5*HZ)
+ fpu_swa_count = 0;
+ if ((fpu_swa_count < 4) && !(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) {
+ last_time = jiffies;
+ ++fpu_swa_count;
+ printk(KERN_WARNING
+ "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
+ current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr);
+ }
+
+ exception = fp_emulate(fp_fault, bundle, &regs->cr_ipsr, &regs->ar_fpsr, &isr, &regs->pr,
+ &regs->cr_ifs, regs);
+ if (fp_fault) {
+ if (exception == 0) {
+ /* emulation was successful */
+ ia64_increment_ip(regs);
+ } else if (exception == -1) {
+ printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
+ return -1;
+ } else {
+ /* is next instruction a trap? */
+ if (exception & 2) {
+ ia64_increment_ip(regs);
+ }
+ siginfo.si_signo = SIGFPE;
+ siginfo.si_errno = 0;
+ siginfo.si_code = __SI_FAULT; /* default code */
+ siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
+ if (isr & 0x11) {
+ siginfo.si_code = FPE_FLTINV;
+ } else if (isr & 0x22) {
+ /* denormal operand gets the same si_code as underflow
+ * see arch/i386/kernel/traps.c:math_error() */
+ siginfo.si_code = FPE_FLTUND;
+ } else if (isr & 0x44) {
+ siginfo.si_code = FPE_FLTDIV;
+ }
+ siginfo.si_isr = isr;
+ siginfo.si_flags = __ISR_VALID;
+ siginfo.si_imm = 0;
+ force_sig_info(SIGFPE, &siginfo, current);
+ }
+ } else {
+ if (exception == -1) {
+ printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
+ return -1;
+ } else if (exception != 0) {
+ /* raise exception */
+ siginfo.si_signo = SIGFPE;
+ siginfo.si_errno = 0;
+ siginfo.si_code = __SI_FAULT; /* default code */
+ siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
+ if (isr & 0x880) {
+ siginfo.si_code = FPE_FLTOVF;
+ } else if (isr & 0x1100) {
+ siginfo.si_code = FPE_FLTUND;
+ } else if (isr & 0x2200) {
+ siginfo.si_code = FPE_FLTRES;
+ }
+ siginfo.si_isr = isr;
+ siginfo.si_flags = __ISR_VALID;
+ siginfo.si_imm = 0;
+ force_sig_info(SIGFPE, &siginfo, current);
+ }
+ }
+ return 0;
+}
+
+struct illegal_op_return {
+ unsigned long fkt, arg1, arg2, arg3;
+};
+
+struct illegal_op_return
+ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3,
+ long arg4, long arg5, long arg6, long arg7,
+ struct pt_regs regs)
+{
+ struct illegal_op_return rv;
+ struct siginfo si;
+ char buf[128];
+
+#ifdef CONFIG_IA64_BRL_EMU
+ {
+ extern struct illegal_op_return ia64_emulate_brl (struct pt_regs *, unsigned long);
+
+ rv = ia64_emulate_brl(&regs, ec);
+ if (rv.fkt != (unsigned long) -1)
+ return rv;
+ }
+#endif
+
+ sprintf(buf, "IA-64 Illegal operation fault");
+ die_if_kernel(buf, &regs, 0);
+
+ memset(&si, 0, sizeof(si));
+ si.si_signo = SIGILL;
+ si.si_code = ILL_ILLOPC;
+ si.si_addr = (void __user *) (regs.cr_iip + ia64_psr(&regs)->ri);
+ force_sig_info(SIGILL, &si, current);
+ rv.fkt = 0;
+ return rv;
+}
+
+void
+ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
+ unsigned long iim, unsigned long itir, long arg5, long arg6,
+ long arg7, struct pt_regs regs)
+{
+ unsigned long code, error = isr, iip;
+ struct siginfo siginfo;
+ char buf[128];
+ int result, sig;
+ static const char *reason[] = {
+ "IA-64 Illegal Operation fault",
+ "IA-64 Privileged Operation fault",
+ "IA-64 Privileged Register fault",
+ "IA-64 Reserved Register/Field fault",
+ "Disabled Instruction Set Transition fault",
+ "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault",
+ "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12",
+ "Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
+ };
+
+ if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) {
+ /*
+ * This fault was due to lfetch.fault, set "ed" bit in the psr to cancel
+ * the lfetch.
+ */
+ ia64_psr(&regs)->ed = 1;
+ return;
+ }
+
+ iip = regs.cr_iip + ia64_psr(&regs)->ri;
+
+ switch (vector) {
+ case 24: /* General Exception */
+ code = (isr >> 4) & 0xf;
+ sprintf(buf, "General Exception: %s%s", reason[code],
+ (code == 3) ? ((isr & (1UL << 37))
+ ? " (RSE access)" : " (data access)") : "");
+ if (code == 8) {
+# ifdef CONFIG_IA64_PRINT_HAZARDS
+ printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n",
+ current->comm, current->pid,
+ regs.cr_iip + ia64_psr(&regs)->ri, regs.pr);
+# endif
+ return;
+ }
+ break;
+
+ case 25: /* Disabled FP-Register */
+ if (isr & 2) {
+ disabled_fph_fault(&regs);
+ return;
+ }
+ sprintf(buf, "Disabled FPL fault---not supposed to happen!");
+ break;
+
+ case 26: /* NaT Consumption */
+ if (user_mode(&regs)) {
+ void __user *addr;
+
+ if (((isr >> 4) & 0xf) == 2) {
+ /* NaT page consumption */
+ sig = SIGSEGV;
+ code = SEGV_ACCERR;
+ addr = (void __user *) ifa;
+ } else {
+ /* register NaT consumption */
+ sig = SIGILL;
+ code = ILL_ILLOPN;
+ addr = (void __user *) (regs.cr_iip
+ + ia64_psr(&regs)->ri);
+ }
+ siginfo.si_signo = sig;
+ siginfo.si_code = code;
+ siginfo.si_errno = 0;
+ siginfo.si_addr = addr;
+ siginfo.si_imm = vector;
+ siginfo.si_flags = __ISR_VALID;
+ siginfo.si_isr = isr;
+ force_sig_info(sig, &siginfo, current);
+ return;
+ } else if (ia64_done_with_exception(&regs))
+ return;
+ sprintf(buf, "NaT consumption");
+ break;
+
+ case 31: /* Unsupported Data Reference */
+ if (user_mode(&regs)) {
+ siginfo.si_signo = SIGILL;
+ siginfo.si_code = ILL_ILLOPN;
+ siginfo.si_errno = 0;
+ siginfo.si_addr = (void __user *) iip;
+ siginfo.si_imm = vector;
+ siginfo.si_flags = __ISR_VALID;
+ siginfo.si_isr = isr;
+ force_sig_info(SIGILL, &siginfo, current);
+ return;
+ }
+ sprintf(buf, "Unsupported data reference");
+ break;
+
+ case 29: /* Debug */
+ case 35: /* Taken Branch Trap */
+ case 36: /* Single Step Trap */
+ if (fsys_mode(current, &regs)) {
+ extern char __kernel_syscall_via_break[];
+ /*
+ * Got a trap in fsys-mode: Taken Branch Trap and Single Step trap
+ * need special handling; Debug trap is not supposed to happen.
+ */
+ if (unlikely(vector == 29)) {
+ die("Got debug trap in fsys-mode---not supposed to happen!",
+ &regs, 0);
+ return;
+ }
+ /* re-do the system call via break 0x100000: */
+ regs.cr_iip = (unsigned long) __kernel_syscall_via_break;
+ ia64_psr(&regs)->ri = 0;
+ ia64_psr(&regs)->cpl = 3;
+ return;
+ }
+ switch (vector) {
+ case 29:
+ siginfo.si_code = TRAP_HWBKPT;
+#ifdef CONFIG_ITANIUM
+ /*
+ * Erratum 10 (IFA may contain incorrect address) now has
+ * "NoFix" status. There are no plans for fixing this.
+ */
+ if (ia64_psr(&regs)->is == 0)
+ ifa = regs.cr_iip;
+#endif
+ break;
+ case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break;
+ case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break;
+ }
+ siginfo.si_signo = SIGTRAP;
+ siginfo.si_errno = 0;
+ siginfo.si_addr = (void __user *) ifa;
+ siginfo.si_imm = 0;
+ siginfo.si_flags = __ISR_VALID;
+ siginfo.si_isr = isr;
+ force_sig_info(SIGTRAP, &siginfo, current);
+ return;
+
+ case 32: /* fp fault */
+ case 33: /* fp trap */
+ result = handle_fpu_swa((vector == 32) ? 1 : 0, &regs, isr);
+ if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) {
+ siginfo.si_signo = SIGFPE;
+ siginfo.si_errno = 0;
+ siginfo.si_code = FPE_FLTINV;
+ siginfo.si_addr = (void __user *) iip;
+ siginfo.si_flags = __ISR_VALID;
+ siginfo.si_isr = isr;
+ siginfo.si_imm = 0;
+ force_sig_info(SIGFPE, &siginfo, current);
+ }
+ return;
+
+ case 34:
+ if (isr & 0x2) {
+ /* Lower-Privilege Transfer Trap */
+ /*
+ * Just clear PSR.lp and then return immediately: all the
+ * interesting work (e.g., signal delivery is done in the kernel
+ * exit path).
+ */
+ ia64_psr(&regs)->lp = 0;
+ return;
+ } else {
+ /* Unimplemented Instr. Address Trap */
+ if (user_mode(&regs)) {
+ siginfo.si_signo = SIGILL;
+ siginfo.si_code = ILL_BADIADDR;
+ siginfo.si_errno = 0;
+ siginfo.si_flags = 0;
+ siginfo.si_isr = 0;
+ siginfo.si_imm = 0;
+ siginfo.si_addr = (void __user *) iip;
+ force_sig_info(SIGILL, &siginfo, current);
+ return;
+ }
+ sprintf(buf, "Unimplemented Instruction Address fault");
+ }
+ break;
+
+ case 45:
+#ifdef CONFIG_IA32_SUPPORT
+ if (ia32_exception(&regs, isr) == 0)
+ return;
+#endif
+ printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
+ printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
+ iip, ifa, isr);
+ force_sig(SIGSEGV, current);
+ break;
+
+ case 46:
+#ifdef CONFIG_IA32_SUPPORT
+ if (ia32_intercept(&regs, isr) == 0)
+ return;
+#endif
+ printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
+ printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n",
+ iip, ifa, isr, iim);
+ force_sig(SIGSEGV, current);
+ return;
+
+ case 47:
+ sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
+ break;
+
+ default:
+ sprintf(buf, "Fault %lu", vector);
+ break;
+ }
+ die_if_kernel(buf, &regs, error);
+ force_sig(SIGILL, current);
+}
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
new file mode 100644
index 00000000000..43b45b65ee5
--- /dev/null
+++ b/arch/ia64/kernel/unaligned.c
@@ -0,0 +1,1521 @@
+/*
+ * Architecture-specific unaligned trap handling.
+ *
+ * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix
+ * get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
+ * stacked register returns an undefined value; it does NOT trigger a
+ * "rsvd register fault").
+ * 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops.
+ * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
+ * 2001/01/17 Add support emulation of unaligned kernel accesses.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/tty.h>
+
+#include <asm/intrinsics.h>
+#include <asm/processor.h>
+#include <asm/rse.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+
+extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
+
+#undef DEBUG_UNALIGNED_TRAP
+
+#ifdef DEBUG_UNALIGNED_TRAP
+# define DPRINT(a...) do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
+# define DDUMP(str,vp,len) dump(str, vp, len)
+
+static void
+dump (const char *str, void *vp, size_t len)
+{
+ unsigned char *cp = vp;
+ int i;
+
+ printk("%s", str);
+ for (i = 0; i < len; ++i)
+ printk (" %02x", *cp++);
+ printk("\n");
+}
+#else
+# define DPRINT(a...)
+# define DDUMP(str,vp,len)
+#endif
+
+#define IA64_FIRST_STACKED_GR 32
+#define IA64_FIRST_ROTATING_FR 32
+#define SIGN_EXT9 0xffffffffffffff00ul
+
+/*
+ * For M-unit:
+ *
+ * opcode | m | x6 |
+ * --------|------|---------|
+ * [40-37] | [36] | [35:30] |
+ * --------|------|---------|
+ * 4 | 1 | 6 | = 11 bits
+ * --------------------------
+ * However bits [31:30] are not directly useful to distinguish between
+ * load/store so we can use [35:32] instead, which gives the following
+ * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
+ * checking the m-bit until later in the load/store emulation.
+ */
+#define IA64_OPCODE_MASK 0x1ef
+#define IA64_OPCODE_SHIFT 32
+
+/*
+ * Table C-28 Integer Load/Store
+ *
+ * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
+ *
+ * ld8.fill, st8.fill MUST be aligned because the RNATs are based on
+ * the address (bits [8:3]), so we must failed.
+ */
+#define LD_OP 0x080
+#define LDS_OP 0x081
+#define LDA_OP 0x082
+#define LDSA_OP 0x083
+#define LDBIAS_OP 0x084
+#define LDACQ_OP 0x085
+/* 0x086, 0x087 are not relevant */
+#define LDCCLR_OP 0x088
+#define LDCNC_OP 0x089
+#define LDCCLRACQ_OP 0x08a
+#define ST_OP 0x08c
+#define STREL_OP 0x08d
+/* 0x08e,0x8f are not relevant */
+
+/*
+ * Table C-29 Integer Load +Reg
+ *
+ * we use the ld->m (bit [36:36]) field to determine whether or not we have
+ * a load/store of this form.
+ */
+
+/*
+ * Table C-30 Integer Load/Store +Imm
+ *
+ * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
+ *
+ * ld8.fill, st8.fill must be aligned because the Nat register are based on
+ * the address, so we must fail and the program must be fixed.
+ */
+#define LD_IMM_OP 0x0a0
+#define LDS_IMM_OP 0x0a1
+#define LDA_IMM_OP 0x0a2
+#define LDSA_IMM_OP 0x0a3
+#define LDBIAS_IMM_OP 0x0a4
+#define LDACQ_IMM_OP 0x0a5
+/* 0x0a6, 0xa7 are not relevant */
+#define LDCCLR_IMM_OP 0x0a8
+#define LDCNC_IMM_OP 0x0a9
+#define LDCCLRACQ_IMM_OP 0x0aa
+#define ST_IMM_OP 0x0ac
+#define STREL_IMM_OP 0x0ad
+/* 0x0ae,0xaf are not relevant */
+
+/*
+ * Table C-32 Floating-point Load/Store
+ */
+#define LDF_OP 0x0c0
+#define LDFS_OP 0x0c1
+#define LDFA_OP 0x0c2
+#define LDFSA_OP 0x0c3
+/* 0x0c6 is irrelevant */
+#define LDFCCLR_OP 0x0c8
+#define LDFCNC_OP 0x0c9
+/* 0x0cb is irrelevant */
+#define STF_OP 0x0cc
+
+/*
+ * Table C-33 Floating-point Load +Reg
+ *
+ * we use the ld->m (bit [36:36]) field to determine whether or not we have
+ * a load/store of this form.
+ */
+
+/*
+ * Table C-34 Floating-point Load/Store +Imm
+ */
+#define LDF_IMM_OP 0x0e0
+#define LDFS_IMM_OP 0x0e1
+#define LDFA_IMM_OP 0x0e2
+#define LDFSA_IMM_OP 0x0e3
+/* 0x0e6 is irrelevant */
+#define LDFCCLR_IMM_OP 0x0e8
+#define LDFCNC_IMM_OP 0x0e9
+#define STF_IMM_OP 0x0ec
+
+typedef struct {
+ unsigned long qp:6; /* [0:5] */
+ unsigned long r1:7; /* [6:12] */
+ unsigned long imm:7; /* [13:19] */
+ unsigned long r3:7; /* [20:26] */
+ unsigned long x:1; /* [27:27] */
+ unsigned long hint:2; /* [28:29] */
+ unsigned long x6_sz:2; /* [30:31] */
+ unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */
+ unsigned long m:1; /* [36:36] */
+ unsigned long op:4; /* [37:40] */
+ unsigned long pad:23; /* [41:63] */
+} load_store_t;
+
+
+typedef enum {
+ UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */
+ UPD_REG /* ldXZ r1=[r3],r2 */
+} update_t;
+
+/*
+ * We use tables to keep track of the offsets of registers in the saved state.
+ * This way we save having big switch/case statements.
+ *
+ * We use bit 0 to indicate switch_stack or pt_regs.
+ * The offset is simply shifted by 1 bit.
+ * A 2-byte value should be enough to hold any kind of offset
+ *
+ * In case the calling convention changes (and thus pt_regs/switch_stack)
+ * simply use RSW instead of RPT or vice-versa.
+ */
+
+#define RPO(x) ((size_t) &((struct pt_regs *)0)->x)
+#define RSO(x) ((size_t) &((struct switch_stack *)0)->x)
+
+#define RPT(x) (RPO(x) << 1)
+#define RSW(x) (1| RSO(x)<<1)
+
+#define GR_OFFS(x) (gr_info[x]>>1)
+#define GR_IN_SW(x) (gr_info[x] & 0x1)
+
+#define FR_OFFS(x) (fr_info[x]>>1)
+#define FR_IN_SW(x) (fr_info[x] & 0x1)
+
+static u16 gr_info[32]={
+ 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
+
+ RPT(r1), RPT(r2), RPT(r3),
+
+ RSW(r4), RSW(r5), RSW(r6), RSW(r7),
+
+ RPT(r8), RPT(r9), RPT(r10), RPT(r11),
+ RPT(r12), RPT(r13), RPT(r14), RPT(r15),
+
+ RPT(r16), RPT(r17), RPT(r18), RPT(r19),
+ RPT(r20), RPT(r21), RPT(r22), RPT(r23),
+ RPT(r24), RPT(r25), RPT(r26), RPT(r27),
+ RPT(r28), RPT(r29), RPT(r30), RPT(r31)
+};
+
+static u16 fr_info[32]={
+ 0, /* constant : WE SHOULD NEVER GET THIS */
+ 0, /* constant : WE SHOULD NEVER GET THIS */
+
+ RSW(f2), RSW(f3), RSW(f4), RSW(f5),
+
+ RPT(f6), RPT(f7), RPT(f8), RPT(f9),
+ RPT(f10), RPT(f11),
+
+ RSW(f12), RSW(f13), RSW(f14),
+ RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
+ RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
+ RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
+ RSW(f30), RSW(f31)
+};
+
+/* Invalidate ALAT entry for integer register REGNO. */
+static void
+invala_gr (int regno)
+{
+# define F(reg) case reg: ia64_invala_gr(reg); break
+
+ switch (regno) {
+ F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
+ F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
+ F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
+ F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
+ F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
+ F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
+ F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
+ F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
+ F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
+ F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
+ F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
+ F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
+ F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
+ F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
+ F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
+ F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
+ }
+# undef F
+}
+
+/* Invalidate ALAT entry for floating-point register REGNO. */
+static void
+invala_fr (int regno)
+{
+# define F(reg) case reg: ia64_invala_fr(reg); break
+
+ switch (regno) {
+ F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
+ F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
+ F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
+ F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
+ F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
+ F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
+ F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
+ F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
+ F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
+ F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
+ F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
+ F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
+ F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
+ F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
+ F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
+ F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
+ }
+# undef F
+}
+
+static inline unsigned long
+rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
+{
+ reg += rrb;
+ if (reg >= sor)
+ reg -= sor;
+ return reg;
+}
+
+static void
+set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
+{
+ struct switch_stack *sw = (struct switch_stack *) regs - 1;
+ unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
+ unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
+ unsigned long rnats, nat_mask;
+ unsigned long on_kbs;
+ long sof = (regs->cr_ifs) & 0x7f;
+ long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
+ long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
+ long ridx = r1 - 32;
+
+ if (ridx >= sof) {
+ /* this should never happen, as the "rsvd register fault" has higher priority */
+ DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
+ return;
+ }
+
+ if (ridx < sor)
+ ridx = rotate_reg(sor, rrb_gr, ridx);
+
+ DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
+ r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
+
+ on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
+ addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
+ if (addr >= kbs) {
+ /* the register is on the kernel backing store: easy... */
+ rnat_addr = ia64_rse_rnat_addr(addr);
+ if ((unsigned long) rnat_addr >= sw->ar_bspstore)
+ rnat_addr = &sw->ar_rnat;
+ nat_mask = 1UL << ia64_rse_slot_num(addr);
+
+ *addr = val;
+ if (nat)
+ *rnat_addr |= nat_mask;
+ else
+ *rnat_addr &= ~nat_mask;
+ return;
+ }
+
+ if (!user_stack(current, regs)) {
+ DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
+ return;
+ }
+
+ bspstore = (unsigned long *)regs->ar_bspstore;
+ ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
+ bsp = ia64_rse_skip_regs(ubs_end, -sof);
+ addr = ia64_rse_skip_regs(bsp, ridx);
+
+ DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
+
+ ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
+
+ rnat_addr = ia64_rse_rnat_addr(addr);
+
+ ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
+ DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
+ (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
+
+ nat_mask = 1UL << ia64_rse_slot_num(addr);
+ if (nat)
+ rnats |= nat_mask;
+ else
+ rnats &= ~nat_mask;
+ ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
+
+ DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
+}
+
+
+static void
+get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
+{
+ struct switch_stack *sw = (struct switch_stack *) regs - 1;
+ unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
+ unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
+ unsigned long rnats, nat_mask;
+ unsigned long on_kbs;
+ long sof = (regs->cr_ifs) & 0x7f;
+ long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
+ long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
+ long ridx = r1 - 32;
+
+ if (ridx >= sof) {
+ /* read of out-of-frame register returns an undefined value; 0 in our case. */
+ DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
+ goto fail;
+ }
+
+ if (ridx < sor)
+ ridx = rotate_reg(sor, rrb_gr, ridx);
+
+ DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
+ r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
+
+ on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
+ addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
+ if (addr >= kbs) {
+ /* the register is on the kernel backing store: easy... */
+ *val = *addr;
+ if (nat) {
+ rnat_addr = ia64_rse_rnat_addr(addr);
+ if ((unsigned long) rnat_addr >= sw->ar_bspstore)
+ rnat_addr = &sw->ar_rnat;
+ nat_mask = 1UL << ia64_rse_slot_num(addr);
+ *nat = (*rnat_addr & nat_mask) != 0;
+ }
+ return;
+ }
+
+ if (!user_stack(current, regs)) {
+ DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
+ goto fail;
+ }
+
+ bspstore = (unsigned long *)regs->ar_bspstore;
+ ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
+ bsp = ia64_rse_skip_regs(ubs_end, -sof);
+ addr = ia64_rse_skip_regs(bsp, ridx);
+
+ DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
+
+ ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
+
+ if (nat) {
+ rnat_addr = ia64_rse_rnat_addr(addr);
+ nat_mask = 1UL << ia64_rse_slot_num(addr);
+
+ DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
+
+ ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
+ *nat = (rnats & nat_mask) != 0;
+ }
+ return;
+
+ fail:
+ *val = 0;
+ if (nat)
+ *nat = 0;
+ return;
+}
+
+
+static void
+setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
+{
+ struct switch_stack *sw = (struct switch_stack *) regs - 1;
+ unsigned long addr;
+ unsigned long bitmask;
+ unsigned long *unat;
+
+ /*
+ * First takes care of stacked registers
+ */
+ if (regnum >= IA64_FIRST_STACKED_GR) {
+ set_rse_reg(regs, regnum, val, nat);
+ return;
+ }
+
+ /*
+ * Using r0 as a target raises a General Exception fault which has higher priority
+ * than the Unaligned Reference fault.
+ */
+
+ /*
+ * Now look at registers in [0-31] range and init correct UNAT
+ */
+ if (GR_IN_SW(regnum)) {
+ addr = (unsigned long)sw;
+ unat = &sw->ar_unat;
+ } else {
+ addr = (unsigned long)regs;
+ unat = &sw->caller_unat;
+ }
+ DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
+ addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
+ /*
+ * add offset from base of struct
+ * and do it !
+ */
+ addr += GR_OFFS(regnum);
+
+ *(unsigned long *)addr = val;
+
+ /*
+ * We need to clear the corresponding UNAT bit to fully emulate the load
+ * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
+ */
+ bitmask = 1UL << (addr >> 3 & 0x3f);
+ DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
+ if (nat) {
+ *unat |= bitmask;
+ } else {
+ *unat &= ~bitmask;
+ }
+ DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
+}
+
+/*
+ * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
+ * range from 32-127, result is in the range from 0-95.
+ */
+static inline unsigned long
+fph_index (struct pt_regs *regs, long regnum)
+{
+ unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
+ return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
+}
+
+static void
+setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
+{
+ struct switch_stack *sw = (struct switch_stack *)regs - 1;
+ unsigned long addr;
+
+ /*
+ * From EAS-2.5: FPDisableFault has higher priority than Unaligned
+ * Fault. Thus, when we get here, we know the partition is enabled.
+ * To update f32-f127, there are three choices:
+ *
+ * (1) save f32-f127 to thread.fph and update the values there
+ * (2) use a gigantic switch statement to directly access the registers
+ * (3) generate code on the fly to update the desired register
+ *
+ * For now, we are using approach (1).
+ */
+ if (regnum >= IA64_FIRST_ROTATING_FR) {
+ ia64_sync_fph(current);
+ current->thread.fph[fph_index(regs, regnum)] = *fpval;
+ } else {
+ /*
+ * pt_regs or switch_stack ?
+ */
+ if (FR_IN_SW(regnum)) {
+ addr = (unsigned long)sw;
+ } else {
+ addr = (unsigned long)regs;
+ }
+
+ DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
+
+ addr += FR_OFFS(regnum);
+ *(struct ia64_fpreg *)addr = *fpval;
+
+ /*
+ * mark the low partition as being used now
+ *
+ * It is highly unlikely that this bit is not already set, but
+ * let's do it for safety.
+ */
+ regs->cr_ipsr |= IA64_PSR_MFL;
+ }
+}
+
+/*
+ * Those 2 inline functions generate the spilled versions of the constant floating point
+ * registers which can be used with stfX
+ */
+static inline void
+float_spill_f0 (struct ia64_fpreg *final)
+{
+ ia64_stf_spill(final, 0);
+}
+
+static inline void
+float_spill_f1 (struct ia64_fpreg *final)
+{
+ ia64_stf_spill(final, 1);
+}
+
+static void
+getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
+{
+ struct switch_stack *sw = (struct switch_stack *) regs - 1;
+ unsigned long addr;
+
+ /*
+ * From EAS-2.5: FPDisableFault has higher priority than
+ * Unaligned Fault. Thus, when we get here, we know the partition is
+ * enabled.
+ *
+ * When regnum > 31, the register is still live and we need to force a save
+ * to current->thread.fph to get access to it. See discussion in setfpreg()
+ * for reasons and other ways of doing this.
+ */
+ if (regnum >= IA64_FIRST_ROTATING_FR) {
+ ia64_flush_fph(current);
+ *fpval = current->thread.fph[fph_index(regs, regnum)];
+ } else {
+ /*
+ * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
+ * not saved, we must generate their spilled form on the fly
+ */
+ switch(regnum) {
+ case 0:
+ float_spill_f0(fpval);
+ break;
+ case 1:
+ float_spill_f1(fpval);
+ break;
+ default:
+ /*
+ * pt_regs or switch_stack ?
+ */
+ addr = FR_IN_SW(regnum) ? (unsigned long)sw
+ : (unsigned long)regs;
+
+ DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
+ FR_IN_SW(regnum), addr, FR_OFFS(regnum));
+
+ addr += FR_OFFS(regnum);
+ *fpval = *(struct ia64_fpreg *)addr;
+ }
+ }
+}
+
+
+static void
+getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
+{
+ struct switch_stack *sw = (struct switch_stack *) regs - 1;
+ unsigned long addr, *unat;
+
+ if (regnum >= IA64_FIRST_STACKED_GR) {
+ get_rse_reg(regs, regnum, val, nat);
+ return;
+ }
+
+ /*
+ * take care of r0 (read-only always evaluate to 0)
+ */
+ if (regnum == 0) {
+ *val = 0;
+ if (nat)
+ *nat = 0;
+ return;
+ }
+
+ /*
+ * Now look at registers in [0-31] range and init correct UNAT
+ */
+ if (GR_IN_SW(regnum)) {
+ addr = (unsigned long)sw;
+ unat = &sw->ar_unat;
+ } else {
+ addr = (unsigned long)regs;
+ unat = &sw->caller_unat;
+ }
+
+ DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum));
+
+ addr += GR_OFFS(regnum);
+
+ *val = *(unsigned long *)addr;
+
+ /*
+ * do it only when requested
+ */
+ if (nat)
+ *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
+}
+
+static void
+emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
+{
+ /*
+ * IMPORTANT:
+ * Given the way we handle unaligned speculative loads, we should
+ * not get to this point in the code but we keep this sanity check,
+ * just in case.
+ */
+ if (ld.x6_op == 1 || ld.x6_op == 3) {
+ printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
+ die_if_kernel("unaligned reference on speculative load with register update\n",
+ regs, 30);
+ }
+
+
+ /*
+ * at this point, we know that the base register to update is valid i.e.,
+ * it's not r0
+ */
+ if (type == UPD_IMMEDIATE) {
+ unsigned long imm;
+
+ /*
+ * Load +Imm: ldXZ r1=[r3],imm(9)
+ *
+ *
+ * form imm9: [13:19] contain the first 7 bits
+ */
+ imm = ld.x << 7 | ld.imm;
+
+ /*
+ * sign extend (1+8bits) if m set
+ */
+ if (ld.m) imm |= SIGN_EXT9;
+
+ /*
+ * ifa == r3 and we know that the NaT bit on r3 was clear so
+ * we can directly use ifa.
+ */
+ ifa += imm;
+
+ setreg(ld.r3, ifa, 0, regs);
+
+ DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
+
+ } else if (ld.m) {
+ unsigned long r2;
+ int nat_r2;
+
+ /*
+ * Load +Reg Opcode: ldXZ r1=[r3],r2
+ *
+ * Note: that we update r3 even in the case of ldfX.a
+ * (where the load does not happen)
+ *
+ * The way the load algorithm works, we know that r3 does not
+ * have its NaT bit set (would have gotten NaT consumption
+ * before getting the unaligned fault). So we can use ifa
+ * which equals r3 at this point.
+ *
+ * IMPORTANT:
+ * The above statement holds ONLY because we know that we
+ * never reach this code when trying to do a ldX.s.
+ * If we ever make it to here on an ldfX.s then
+ */
+ getreg(ld.imm, &r2, &nat_r2, regs);
+
+ ifa += r2;
+
+ /*
+ * propagate Nat r2 -> r3
+ */
+ setreg(ld.r3, ifa, nat_r2, regs);
+
+ DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
+ }
+}
+
+
+static int
+emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
+{
+ unsigned int len = 1 << ld.x6_sz;
+ unsigned long val = 0;
+
+ /*
+ * r0, as target, doesn't need to be checked because Illegal Instruction
+ * faults have higher priority than unaligned faults.
+ *
+ * r0 cannot be found as the base as it would never generate an
+ * unaligned reference.
+ */
+
+ /*
+ * ldX.a we will emulate load and also invalidate the ALAT entry.
+ * See comment below for explanation on how we handle ldX.a
+ */
+
+ if (len != 2 && len != 4 && len != 8) {
+ DPRINT("unknown size: x6=%d\n", ld.x6_sz);
+ return -1;
+ }
+ /* this assumes little-endian byte-order: */
+ if (copy_from_user(&val, (void __user *) ifa, len))
+ return -1;
+ setreg(ld.r1, val, 0, regs);
+
+ /*
+ * check for updates on any kind of loads
+ */
+ if (ld.op == 0x5 || ld.m)
+ emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
+
+ /*
+ * handling of various loads (based on EAS2.4):
+ *
+ * ldX.acq (ordered load):
+ * - acquire semantics would have been used, so force fence instead.
+ *
+ * ldX.c.clr (check load and clear):
+ * - if we get to this handler, it's because the entry was not in the ALAT.
+ * Therefore the operation reverts to a normal load
+ *
+ * ldX.c.nc (check load no clear):
+ * - same as previous one
+ *
+ * ldX.c.clr.acq (ordered check load and clear):
+ * - same as above for c.clr part. The load needs to have acquire semantics. So
+ * we use the fence semantics which is stronger and thus ensures correctness.
+ *
+ * ldX.a (advanced load):
+ * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
+ * address doesn't match requested size alignment. This means that we would
+ * possibly need more than one load to get the result.
+ *
+ * The load part can be handled just like a normal load, however the difficult
+ * part is to get the right thing into the ALAT. The critical piece of information
+ * in the base address of the load & size. To do that, a ld.a must be executed,
+ * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
+ * if we use the same target register, we will be okay for the check.a instruction.
+ * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry
+ * which would overlap within [r3,r3+X] (the size of the load was store in the
+ * ALAT). If such an entry is found the entry is invalidated. But this is not good
+ * enough, take the following example:
+ * r3=3
+ * ld4.a r1=[r3]
+ *
+ * Could be emulated by doing:
+ * ld1.a r1=[r3],1
+ * store to temporary;
+ * ld1.a r1=[r3],1
+ * store & shift to temporary;
+ * ld1.a r1=[r3],1
+ * store & shift to temporary;
+ * ld1.a r1=[r3]
+ * store & shift to temporary;
+ * r1=temporary
+ *
+ * So in this case, you would get the right value is r1 but the wrong info in
+ * the ALAT. Notice that you could do it in reverse to finish with address 3
+ * but you would still get the size wrong. To get the size right, one needs to
+ * execute exactly the same kind of load. You could do it from a aligned
+ * temporary location, but you would get the address wrong.
+ *
+ * So no matter what, it is not possible to emulate an advanced load
+ * correctly. But is that really critical ?
+ *
+ * We will always convert ld.a into a normal load with ALAT invalidated. This
+ * will enable compiler to do optimization where certain code path after ld.a
+ * is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
+ *
+ * If there is a store after the advanced load, one must either do a ld.c.* or
+ * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
+ * entry found in ALAT), and that's perfectly ok because:
+ *
+ * - ld.c.*, if the entry is not present a normal load is executed
+ * - chk.a.*, if the entry is not present, execution jumps to recovery code
+ *
+ * In either case, the load can be potentially retried in another form.
+ *
+ * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
+ * up a stale entry later). The register base update MUST also be performed.
+ */
+
+ /*
+ * when the load has the .acq completer then
+ * use ordering fence.
+ */
+ if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
+ mb();
+
+ /*
+ * invalidate ALAT entry in case of advanced load
+ */
+ if (ld.x6_op == 0x2)
+ invala_gr(ld.r1);
+
+ return 0;
+}
+
+static int
+emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
+{
+ unsigned long r2;
+ unsigned int len = 1 << ld.x6_sz;
+
+ /*
+ * if we get to this handler, Nat bits on both r3 and r2 have already
+ * been checked. so we don't need to do it
+ *
+ * extract the value to be stored
+ */
+ getreg(ld.imm, &r2, NULL, regs);
+
+ /*
+ * we rely on the macros in unaligned.h for now i.e.,
+ * we let the compiler figure out how to read memory gracefully.
+ *
+ * We need this switch/case because the way the inline function
+ * works. The code is optimized by the compiler and looks like
+ * a single switch/case.
+ */
+ DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
+
+ if (len != 2 && len != 4 && len != 8) {
+ DPRINT("unknown size: x6=%d\n", ld.x6_sz);
+ return -1;
+ }
+
+ /* this assumes little-endian byte-order: */
+ if (copy_to_user((void __user *) ifa, &r2, len))
+ return -1;
+
+ /*
+ * stX [r3]=r2,imm(9)
+ *
+ * NOTE:
+ * ld.r3 can never be r0, because r0 would not generate an
+ * unaligned access.
+ */
+ if (ld.op == 0x5) {
+ unsigned long imm;
+
+ /*
+ * form imm9: [12:6] contain first 7bits
+ */
+ imm = ld.x << 7 | ld.r1;
+ /*
+ * sign extend (8bits) if m set
+ */
+ if (ld.m) imm |= SIGN_EXT9;
+ /*
+ * ifa == r3 (NaT is necessarily cleared)
+ */
+ ifa += imm;
+
+ DPRINT("imm=%lx r3=%lx\n", imm, ifa);
+
+ setreg(ld.r3, ifa, 0, regs);
+ }
+ /*
+ * we don't have alat_invalidate_multiple() so we need
+ * to do the complete flush :-<<
+ */
+ ia64_invala();
+
+ /*
+ * stX.rel: use fence instead of release
+ */
+ if (ld.x6_op == 0xd)
+ mb();
+
+ return 0;
+}
+
+/*
+ * floating point operations sizes in bytes
+ */
+static const unsigned char float_fsz[4]={
+ 10, /* extended precision (e) */
+ 8, /* integer (8) */
+ 4, /* single precision (s) */
+ 8 /* double precision (d) */
+};
+
+static inline void
+mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ ia64_ldfe(6, init);
+ ia64_stop();
+ ia64_stf_spill(final, 6);
+}
+
+static inline void
+mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ ia64_ldf8(6, init);
+ ia64_stop();
+ ia64_stf_spill(final, 6);
+}
+
+static inline void
+mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ ia64_ldfs(6, init);
+ ia64_stop();
+ ia64_stf_spill(final, 6);
+}
+
+static inline void
+mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ ia64_ldfd(6, init);
+ ia64_stop();
+ ia64_stf_spill(final, 6);
+}
+
+static inline void
+float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ ia64_ldf_fill(6, init);
+ ia64_stop();
+ ia64_stfe(final, 6);
+}
+
+static inline void
+float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ ia64_ldf_fill(6, init);
+ ia64_stop();
+ ia64_stf8(final, 6);
+}
+
+static inline void
+float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ ia64_ldf_fill(6, init);
+ ia64_stop();
+ ia64_stfs(final, 6);
+}
+
+static inline void
+float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ ia64_ldf_fill(6, init);
+ ia64_stop();
+ ia64_stfd(final, 6);
+}
+
+static int
+emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
+{
+ struct ia64_fpreg fpr_init[2];
+ struct ia64_fpreg fpr_final[2];
+ unsigned long len = float_fsz[ld.x6_sz];
+
+ /*
+ * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
+ * higher priority than unaligned faults.
+ *
+ * r0 cannot be found as the base as it would never generate an unaligned
+ * reference.
+ */
+
+ /*
+ * make sure we get clean buffers
+ */
+ memset(&fpr_init, 0, sizeof(fpr_init));
+ memset(&fpr_final, 0, sizeof(fpr_final));
+
+ /*
+ * ldfpX.a: we don't try to emulate anything but we must
+ * invalidate the ALAT entry and execute updates, if any.
+ */
+ if (ld.x6_op != 0x2) {
+ /*
+ * This assumes little-endian byte-order. Note that there is no "ldfpe"
+ * instruction:
+ */
+ if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
+ || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
+ return -1;
+
+ DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
+ DDUMP("frp_init =", &fpr_init, 2*len);
+ /*
+ * XXX fixme
+ * Could optimize inlines by using ldfpX & 2 spills
+ */
+ switch( ld.x6_sz ) {
+ case 0:
+ mem2float_extended(&fpr_init[0], &fpr_final[0]);
+ mem2float_extended(&fpr_init[1], &fpr_final[1]);
+ break;
+ case 1:
+ mem2float_integer(&fpr_init[0], &fpr_final[0]);
+ mem2float_integer(&fpr_init[1], &fpr_final[1]);
+ break;
+ case 2:
+ mem2float_single(&fpr_init[0], &fpr_final[0]);
+ mem2float_single(&fpr_init[1], &fpr_final[1]);
+ break;
+ case 3:
+ mem2float_double(&fpr_init[0], &fpr_final[0]);
+ mem2float_double(&fpr_init[1], &fpr_final[1]);
+ break;
+ }
+ DDUMP("fpr_final =", &fpr_final, 2*len);
+ /*
+ * XXX fixme
+ *
+ * A possible optimization would be to drop fpr_final and directly
+ * use the storage from the saved context i.e., the actual final
+ * destination (pt_regs, switch_stack or thread structure).
+ */
+ setfpreg(ld.r1, &fpr_final[0], regs);
+ setfpreg(ld.imm, &fpr_final[1], regs);
+ }
+
+ /*
+ * Check for updates: only immediate updates are available for this
+ * instruction.
+ */
+ if (ld.m) {
+ /*
+ * the immediate is implicit given the ldsz of the operation:
+ * single: 8 (2x4) and for all others it's 16 (2x8)
+ */
+ ifa += len<<1;
+
+ /*
+ * IMPORTANT:
+ * the fact that we force the NaT of r3 to zero is ONLY valid
+ * as long as we don't come here with a ldfpX.s.
+ * For this reason we keep this sanity check
+ */
+ if (ld.x6_op == 1 || ld.x6_op == 3)
+ printk(KERN_ERR "%s: register update on speculative load pair, error\n",
+ __FUNCTION__);
+
+ setreg(ld.r3, ifa, 0, regs);
+ }
+
+ /*
+ * Invalidate ALAT entries, if any, for both registers.
+ */
+ if (ld.x6_op == 0x2) {
+ invala_fr(ld.r1);
+ invala_fr(ld.imm);
+ }
+ return 0;
+}
+
+
+static int
+emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
+{
+ struct ia64_fpreg fpr_init;
+ struct ia64_fpreg fpr_final;
+ unsigned long len = float_fsz[ld.x6_sz];
+
+ /*
+ * fr0 & fr1 don't need to be checked because Illegal Instruction
+ * faults have higher priority than unaligned faults.
+ *
+ * r0 cannot be found as the base as it would never generate an
+ * unaligned reference.
+ */
+
+ /*
+ * make sure we get clean buffers
+ */
+ memset(&fpr_init,0, sizeof(fpr_init));
+ memset(&fpr_final,0, sizeof(fpr_final));
+
+ /*
+ * ldfX.a we don't try to emulate anything but we must
+ * invalidate the ALAT entry.
+ * See comments in ldX for descriptions on how the various loads are handled.
+ */
+ if (ld.x6_op != 0x2) {
+ if (copy_from_user(&fpr_init, (void __user *) ifa, len))
+ return -1;
+
+ DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
+ DDUMP("fpr_init =", &fpr_init, len);
+ /*
+ * we only do something for x6_op={0,8,9}
+ */
+ switch( ld.x6_sz ) {
+ case 0:
+ mem2float_extended(&fpr_init, &fpr_final);
+ break;
+ case 1:
+ mem2float_integer(&fpr_init, &fpr_final);
+ break;
+ case 2:
+ mem2float_single(&fpr_init, &fpr_final);
+ break;
+ case 3:
+ mem2float_double(&fpr_init, &fpr_final);
+ break;
+ }
+ DDUMP("fpr_final =", &fpr_final, len);
+ /*
+ * XXX fixme
+ *
+ * A possible optimization would be to drop fpr_final and directly
+ * use the storage from the saved context i.e., the actual final
+ * destination (pt_regs, switch_stack or thread structure).
+ */
+ setfpreg(ld.r1, &fpr_final, regs);
+ }
+
+ /*
+ * check for updates on any loads
+ */
+ if (ld.op == 0x7 || ld.m)
+ emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
+
+ /*
+ * invalidate ALAT entry in case of advanced floating point loads
+ */
+ if (ld.x6_op == 0x2)
+ invala_fr(ld.r1);
+
+ return 0;
+}
+
+
+static int
+emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
+{
+ struct ia64_fpreg fpr_init;
+ struct ia64_fpreg fpr_final;
+ unsigned long len = float_fsz[ld.x6_sz];
+
+ /*
+ * make sure we get clean buffers
+ */
+ memset(&fpr_init,0, sizeof(fpr_init));
+ memset(&fpr_final,0, sizeof(fpr_final));
+
+ /*
+ * if we get to this handler, Nat bits on both r3 and r2 have already
+ * been checked. so we don't need to do it
+ *
+ * extract the value to be stored
+ */
+ getfpreg(ld.imm, &fpr_init, regs);
+ /*
+ * during this step, we extract the spilled registers from the saved
+ * context i.e., we refill. Then we store (no spill) to temporary
+ * aligned location
+ */
+ switch( ld.x6_sz ) {
+ case 0:
+ float2mem_extended(&fpr_init, &fpr_final);
+ break;
+ case 1:
+ float2mem_integer(&fpr_init, &fpr_final);
+ break;
+ case 2:
+ float2mem_single(&fpr_init, &fpr_final);
+ break;
+ case 3:
+ float2mem_double(&fpr_init, &fpr_final);
+ break;
+ }
+ DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
+ DDUMP("fpr_init =", &fpr_init, len);
+ DDUMP("fpr_final =", &fpr_final, len);
+
+ if (copy_to_user((void __user *) ifa, &fpr_final, len))
+ return -1;
+
+ /*
+ * stfX [r3]=r2,imm(9)
+ *
+ * NOTE:
+ * ld.r3 can never be r0, because r0 would not generate an
+ * unaligned access.
+ */
+ if (ld.op == 0x7) {
+ unsigned long imm;
+
+ /*
+ * form imm9: [12:6] contain first 7bits
+ */
+ imm = ld.x << 7 | ld.r1;
+ /*
+ * sign extend (8bits) if m set
+ */
+ if (ld.m)
+ imm |= SIGN_EXT9;
+ /*
+ * ifa == r3 (NaT is necessarily cleared)
+ */
+ ifa += imm;
+
+ DPRINT("imm=%lx r3=%lx\n", imm, ifa);
+
+ setreg(ld.r3, ifa, 0, regs);
+ }
+ /*
+ * we don't have alat_invalidate_multiple() so we need
+ * to do the complete flush :-<<
+ */
+ ia64_invala();
+
+ return 0;
+}
+
+/*
+ * Make sure we log the unaligned access, so that user/sysadmin can notice it and
+ * eventually fix the program. However, we don't want to do that for every access so we
+ * pace it with jiffies. This isn't really MP-safe, but it doesn't really have to be
+ * either...
+ */
+static int
+within_logging_rate_limit (void)
+{
+ static unsigned long count, last_time;
+
+ if (jiffies - last_time > 5*HZ)
+ count = 0;
+ if (++count < 5) {
+ last_time = jiffies;
+ return 1;
+ }
+ return 0;
+
+}
+
+void
+ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
+{
+ struct ia64_psr *ipsr = ia64_psr(regs);
+ mm_segment_t old_fs = get_fs();
+ unsigned long bundle[2];
+ unsigned long opcode;
+ struct siginfo si;
+ const struct exception_table_entry *eh = NULL;
+ union {
+ unsigned long l;
+ load_store_t insn;
+ } u;
+ int ret = -1;
+
+ if (ia64_psr(regs)->be) {
+ /* we don't support big-endian accesses */
+ die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
+ goto force_sigbus;
+ }
+
+ /*
+ * Treat kernel accesses for which there is an exception handler entry the same as
+ * user-level unaligned accesses. Otherwise, a clever program could trick this
+ * handler into reading an arbitrary kernel addresses...
+ */
+ if (!user_mode(regs))
+ eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
+ if (user_mode(regs) || eh) {
+ if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
+ goto force_sigbus;
+
+ if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)
+ && within_logging_rate_limit())
+ {
+ char buf[200]; /* comm[] is at most 16 bytes... */
+ size_t len;
+
+ len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
+ "ip=0x%016lx\n\r", current->comm, current->pid,
+ ifa, regs->cr_iip + ipsr->ri);
+ /*
+ * Don't call tty_write_message() if we're in the kernel; we might
+ * be holding locks...
+ */
+ if (user_mode(regs))
+ tty_write_message(current->signal->tty, buf);
+ buf[len-1] = '\0'; /* drop '\r' */
+ printk(KERN_WARNING "%s", buf); /* watch for command names containing %s */
+ }
+ } else {
+ if (within_logging_rate_limit())
+ printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
+ ifa, regs->cr_iip + ipsr->ri);
+ set_fs(KERNEL_DS);
+ }
+
+ DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
+ regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
+
+ if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
+ goto failure;
+
+ /*
+ * extract the instruction from the bundle given the slot number
+ */
+ switch (ipsr->ri) {
+ case 0: u.l = (bundle[0] >> 5); break;
+ case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
+ case 2: u.l = (bundle[1] >> 23); break;
+ }
+ opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
+
+ DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
+ "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
+ u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
+
+ /*
+ * IMPORTANT:
+ * Notice that the switch statement DOES not cover all possible instructions
+ * that DO generate unaligned references. This is made on purpose because for some
+ * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
+ * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
+ * the program will get a signal and die:
+ *
+ * load/store:
+ * - ldX.spill
+ * - stX.spill
+ * Reason: RNATs are based on addresses
+ * - ld16
+ * - st16
+ * Reason: ld16 and st16 are supposed to occur in a single
+ * memory op
+ *
+ * synchronization:
+ * - cmpxchg
+ * - fetchadd
+ * - xchg
+ * Reason: ATOMIC operations cannot be emulated properly using multiple
+ * instructions.
+ *
+ * speculative loads:
+ * - ldX.sZ
+ * Reason: side effects, code must be ready to deal with failure so simpler
+ * to let the load fail.
+ * ---------------------------------------------------------------------------------
+ * XXX fixme
+ *
+ * I would like to get rid of this switch case and do something
+ * more elegant.
+ */
+ switch (opcode) {
+ case LDS_OP:
+ case LDSA_OP:
+ if (u.insn.x)
+ /* oops, really a semaphore op (cmpxchg, etc) */
+ goto failure;
+ /* no break */
+ case LDS_IMM_OP:
+ case LDSA_IMM_OP:
+ case LDFS_OP:
+ case LDFSA_OP:
+ case LDFS_IMM_OP:
+ /*
+ * The instruction will be retried with deferred exceptions turned on, and
+ * we should get Nat bit installed
+ *
+ * IMPORTANT: When PSR_ED is set, the register & immediate update forms
+ * are actually executed even though the operation failed. So we don't
+ * need to take care of this.
+ */
+ DPRINT("forcing PSR_ED\n");
+ regs->cr_ipsr |= IA64_PSR_ED;
+ goto done;
+
+ case LD_OP:
+ case LDA_OP:
+ case LDBIAS_OP:
+ case LDACQ_OP:
+ case LDCCLR_OP:
+ case LDCNC_OP:
+ case LDCCLRACQ_OP:
+ if (u.insn.x)
+ /* oops, really a semaphore op (cmpxchg, etc) */
+ goto failure;
+ /* no break */
+ case LD_IMM_OP:
+ case LDA_IMM_OP:
+ case LDBIAS_IMM_OP:
+ case LDACQ_IMM_OP:
+ case LDCCLR_IMM_OP:
+ case LDCNC_IMM_OP:
+ case LDCCLRACQ_IMM_OP:
+ ret = emulate_load_int(ifa, u.insn, regs);
+ break;
+
+ case ST_OP:
+ case STREL_OP:
+ if (u.insn.x)
+ /* oops, really a semaphore op (cmpxchg, etc) */
+ goto failure;
+ /* no break */
+ case ST_IMM_OP:
+ case STREL_IMM_OP:
+ ret = emulate_store_int(ifa, u.insn, regs);
+ break;
+
+ case LDF_OP:
+ case LDFA_OP:
+ case LDFCCLR_OP:
+ case LDFCNC_OP:
+ case LDF_IMM_OP:
+ case LDFA_IMM_OP:
+ case LDFCCLR_IMM_OP:
+ case LDFCNC_IMM_OP:
+ if (u.insn.x)
+ ret = emulate_load_floatpair(ifa, u.insn, regs);
+ else
+ ret = emulate_load_float(ifa, u.insn, regs);
+ break;
+
+ case STF_OP:
+ case STF_IMM_OP:
+ ret = emulate_store_float(ifa, u.insn, regs);
+ break;
+
+ default:
+ goto failure;
+ }
+ DPRINT("ret=%d\n", ret);
+ if (ret)
+ goto failure;
+
+ if (ipsr->ri == 2)
+ /*
+ * given today's architecture this case is not likely to happen because a
+ * memory access instruction (M) can never be in the last slot of a
+ * bundle. But let's keep it for now.
+ */
+ regs->cr_iip += 16;
+ ipsr->ri = (ipsr->ri + 1) & 0x3;
+
+ DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
+ done:
+ set_fs(old_fs); /* restore original address limit */
+ return;
+
+ failure:
+ /* something went wrong... */
+ if (!user_mode(regs)) {
+ if (eh) {
+ ia64_handle_exception(regs, eh);
+ goto done;
+ }
+ die_if_kernel("error during unaligned kernel access\n", regs, ret);
+ /* NOT_REACHED */
+ }
+ force_sigbus:
+ si.si_signo = SIGBUS;
+ si.si_errno = 0;
+ si.si_code = BUS_ADRALN;
+ si.si_addr = (void __user *) ifa;
+ si.si_flags = 0;
+ si.si_isr = 0;
+ si.si_imm = 0;
+ force_sig_info(SIGBUS, &si, current);
+ goto done;
+}
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
new file mode 100644
index 00000000000..d494ff647ca
--- /dev/null
+++ b/arch/ia64/kernel/unwind.c
@@ -0,0 +1,2306 @@
+/*
+ * Copyright (C) 1999-2004 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2003 Fenghua Yu <fenghua.yu@intel.com>
+ * - Change pt_regs_off() to make it less dependant on pt_regs structure.
+ */
+/*
+ * This file implements call frame unwind support for the Linux
+ * kernel. Parsing and processing the unwind information is
+ * time-consuming, so this implementation translates the unwind
+ * descriptors into unwind scripts. These scripts are very simple
+ * (basically a sequence of assignments) and efficient to execute.
+ * They are cached for later re-use. Each script is specific for a
+ * given instruction pointer address and the set of predicate values
+ * that the script depends on (most unwind descriptors are
+ * unconditional and scripts often do not depend on predicates at
+ * all). This code is based on the unwind conventions described in
+ * the "IA-64 Software Conventions and Runtime Architecture" manual.
+ *
+ * SMP conventions:
+ * o updates to the global unwind data (in structure "unw") are serialized
+ * by the unw.lock spinlock
+ * o each unwind script has its own read-write lock; a thread must acquire
+ * a read lock before executing a script and must acquire a write lock
+ * before modifying a script
+ * o if both the unw.lock spinlock and a script's read-write lock must be
+ * acquired, then the read-write lock must be acquired first.
+ */
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/unwind.h>
+
+#include <asm/delay.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/ptrace_offsets.h>
+#include <asm/rse.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "entry.h"
+#include "unwind_i.h"
+
+#define UNW_LOG_CACHE_SIZE 7 /* each unw_script is ~256 bytes in size */
+#define UNW_CACHE_SIZE (1 << UNW_LOG_CACHE_SIZE)
+
+#define UNW_LOG_HASH_SIZE (UNW_LOG_CACHE_SIZE + 1)
+#define UNW_HASH_SIZE (1 << UNW_LOG_HASH_SIZE)
+
+#define UNW_STATS 0 /* WARNING: this disabled interrupts for long time-spans!! */
+
+#ifdef UNW_DEBUG
+ static unsigned int unw_debug_level = UNW_DEBUG;
+# define UNW_DEBUG_ON(n) unw_debug_level >= n
+ /* Do not code a printk level, not all debug lines end in newline */
+# define UNW_DPRINT(n, ...) if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__)
+# define inline
+#else /* !UNW_DEBUG */
+# define UNW_DEBUG_ON(n) 0
+# define UNW_DPRINT(n, ...)
+#endif /* UNW_DEBUG */
+
+#if UNW_STATS
+# define STAT(x...) x
+#else
+# define STAT(x...)
+#endif
+
+#define alloc_reg_state() kmalloc(sizeof(struct unw_reg_state), GFP_ATOMIC)
+#define free_reg_state(usr) kfree(usr)
+#define alloc_labeled_state() kmalloc(sizeof(struct unw_labeled_state), GFP_ATOMIC)
+#define free_labeled_state(usr) kfree(usr)
+
+typedef unsigned long unw_word;
+typedef unsigned char unw_hash_index_t;
+
+static struct {
+ spinlock_t lock; /* spinlock for unwind data */
+
+ /* list of unwind tables (one per load-module) */
+ struct unw_table *tables;
+
+ unsigned long r0; /* constant 0 for r0 */
+
+ /* table of registers that prologues can save (and order in which they're saved): */
+ const unsigned char save_order[8];
+
+ /* maps a preserved register index (preg_index) to corresponding switch_stack offset: */
+ unsigned short sw_off[sizeof(struct unw_frame_info) / 8];
+
+ unsigned short lru_head; /* index of lead-recently used script */
+ unsigned short lru_tail; /* index of most-recently used script */
+
+ /* index into unw_frame_info for preserved register i */
+ unsigned short preg_index[UNW_NUM_REGS];
+
+ short pt_regs_offsets[32];
+
+ /* unwind table for the kernel: */
+ struct unw_table kernel_table;
+
+ /* unwind table describing the gate page (kernel code that is mapped into user space): */
+ size_t gate_table_size;
+ unsigned long *gate_table;
+
+ /* hash table that maps instruction pointer to script index: */
+ unsigned short hash[UNW_HASH_SIZE];
+
+ /* script cache: */
+ struct unw_script cache[UNW_CACHE_SIZE];
+
+# ifdef UNW_DEBUG
+ const char *preg_name[UNW_NUM_REGS];
+# endif
+# if UNW_STATS
+ struct {
+ struct {
+ int lookups;
+ int hinted_hits;
+ int normal_hits;
+ int collision_chain_traversals;
+ } cache;
+ struct {
+ unsigned long build_time;
+ unsigned long run_time;
+ unsigned long parse_time;
+ int builds;
+ int news;
+ int collisions;
+ int runs;
+ } script;
+ struct {
+ unsigned long init_time;
+ unsigned long unwind_time;
+ int inits;
+ int unwinds;
+ } api;
+ } stat;
+# endif
+} unw = {
+ .tables = &unw.kernel_table,
+ .lock = SPIN_LOCK_UNLOCKED,
+ .save_order = {
+ UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR,
+ UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR
+ },
+ .preg_index = {
+ offsetof(struct unw_frame_info, pri_unat_loc)/8, /* PRI_UNAT_GR */
+ offsetof(struct unw_frame_info, pri_unat_loc)/8, /* PRI_UNAT_MEM */
+ offsetof(struct unw_frame_info, bsp_loc)/8,
+ offsetof(struct unw_frame_info, bspstore_loc)/8,
+ offsetof(struct unw_frame_info, pfs_loc)/8,
+ offsetof(struct unw_frame_info, rnat_loc)/8,
+ offsetof(struct unw_frame_info, psp)/8,
+ offsetof(struct unw_frame_info, rp_loc)/8,
+ offsetof(struct unw_frame_info, r4)/8,
+ offsetof(struct unw_frame_info, r5)/8,
+ offsetof(struct unw_frame_info, r6)/8,
+ offsetof(struct unw_frame_info, r7)/8,
+ offsetof(struct unw_frame_info, unat_loc)/8,
+ offsetof(struct unw_frame_info, pr_loc)/8,
+ offsetof(struct unw_frame_info, lc_loc)/8,
+ offsetof(struct unw_frame_info, fpsr_loc)/8,
+ offsetof(struct unw_frame_info, b1_loc)/8,
+ offsetof(struct unw_frame_info, b2_loc)/8,
+ offsetof(struct unw_frame_info, b3_loc)/8,
+ offsetof(struct unw_frame_info, b4_loc)/8,
+ offsetof(struct unw_frame_info, b5_loc)/8,
+ offsetof(struct unw_frame_info, f2_loc)/8,
+ offsetof(struct unw_frame_info, f3_loc)/8,
+ offsetof(struct unw_frame_info, f4_loc)/8,
+ offsetof(struct unw_frame_info, f5_loc)/8,
+ offsetof(struct unw_frame_info, fr_loc[16 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[17 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[18 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[19 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[20 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[21 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[22 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[23 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[24 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[25 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[26 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[27 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[28 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[29 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[30 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[31 - 16])/8,
+ },
+ .pt_regs_offsets = {
+ [0] = -1,
+ offsetof(struct pt_regs, r1),
+ offsetof(struct pt_regs, r2),
+ offsetof(struct pt_regs, r3),
+ [4] = -1, [5] = -1, [6] = -1, [7] = -1,
+ offsetof(struct pt_regs, r8),
+ offsetof(struct pt_regs, r9),
+ offsetof(struct pt_regs, r10),
+ offsetof(struct pt_regs, r11),
+ offsetof(struct pt_regs, r12),
+ offsetof(struct pt_regs, r13),
+ offsetof(struct pt_regs, r14),
+ offsetof(struct pt_regs, r15),
+ offsetof(struct pt_regs, r16),
+ offsetof(struct pt_regs, r17),
+ offsetof(struct pt_regs, r18),
+ offsetof(struct pt_regs, r19),
+ offsetof(struct pt_regs, r20),
+ offsetof(struct pt_regs, r21),
+ offsetof(struct pt_regs, r22),
+ offsetof(struct pt_regs, r23),
+ offsetof(struct pt_regs, r24),
+ offsetof(struct pt_regs, r25),
+ offsetof(struct pt_regs, r26),
+ offsetof(struct pt_regs, r27),
+ offsetof(struct pt_regs, r28),
+ offsetof(struct pt_regs, r29),
+ offsetof(struct pt_regs, r30),
+ offsetof(struct pt_regs, r31),
+ },
+ .hash = { [0 ... UNW_HASH_SIZE - 1] = -1 },
+#ifdef UNW_DEBUG
+ .preg_name = {
+ "pri_unat_gr", "pri_unat_mem", "bsp", "bspstore", "ar.pfs", "ar.rnat", "psp", "rp",
+ "r4", "r5", "r6", "r7",
+ "ar.unat", "pr", "ar.lc", "ar.fpsr",
+ "b1", "b2", "b3", "b4", "b5",
+ "f2", "f3", "f4", "f5",
+ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
+ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
+ }
+#endif
+};
+
+static inline int
+read_only (void *addr)
+{
+ return (unsigned long) ((char *) addr - (char *) &unw.r0) < sizeof(unw.r0);
+}
+
+/*
+ * Returns offset of rREG in struct pt_regs.
+ */
+static inline unsigned long
+pt_regs_off (unsigned long reg)
+{
+ short off = -1;
+
+ if (reg < ARRAY_SIZE(unw.pt_regs_offsets))
+ off = unw.pt_regs_offsets[reg];
+
+ if (off < 0) {
+ UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n", __FUNCTION__, reg);
+ off = 0;
+ }
+ return (unsigned long) off;
+}
+
+static inline struct pt_regs *
+get_scratch_regs (struct unw_frame_info *info)
+{
+ if (!info->pt) {
+ /* This should not happen with valid unwind info. */
+ UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting info->pt\n", __FUNCTION__);
+ if (info->flags & UNW_FLAG_INTERRUPT_FRAME)
+ info->pt = (unsigned long) ((struct pt_regs *) info->psp - 1);
+ else
+ info->pt = info->sp - 16;
+ }
+ UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __FUNCTION__, info->sp, info->pt);
+ return (struct pt_regs *) info->pt;
+}
+
+/* Unwind accessors. */
+
+int
+unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char *nat, int write)
+{
+ unsigned long *addr, *nat_addr, nat_mask = 0, dummy_nat;
+ struct unw_ireg *ireg;
+ struct pt_regs *pt;
+
+ if ((unsigned) regnum - 1 >= 127) {
+ if (regnum == 0 && !write) {
+ *val = 0; /* read r0 always returns 0 */
+ *nat = 0;
+ return 0;
+ }
+ UNW_DPRINT(0, "unwind.%s: trying to access non-existent r%u\n",
+ __FUNCTION__, regnum);
+ return -1;
+ }
+
+ if (regnum < 32) {
+ if (regnum >= 4 && regnum <= 7) {
+ /* access a preserved register */
+ ireg = &info->r4 + (regnum - 4);
+ addr = ireg->loc;
+ if (addr) {
+ nat_addr = addr + ireg->nat.off;
+ switch (ireg->nat.type) {
+ case UNW_NAT_VAL:
+ /* simulate getf.sig/setf.sig */
+ if (write) {
+ if (*nat) {
+ /* write NaTVal and be done with it */
+ addr[0] = 0;
+ addr[1] = 0x1fffe;
+ return 0;
+ }
+ addr[1] = 0x1003e;
+ } else {
+ if (addr[0] == 0 && addr[1] == 0x1ffe) {
+ /* return NaT and be done with it */
+ *val = 0;
+ *nat = 1;
+ return 0;
+ }
+ }
+ /* fall through */
+ case UNW_NAT_NONE:
+ dummy_nat = 0;
+ nat_addr = &dummy_nat;
+ break;
+
+ case UNW_NAT_MEMSTK:
+ nat_mask = (1UL << ((long) addr & 0x1f8)/8);
+ break;
+
+ case UNW_NAT_REGSTK:
+ nat_addr = ia64_rse_rnat_addr(addr);
+ if ((unsigned long) addr < info->regstk.limit
+ || (unsigned long) addr >= info->regstk.top)
+ {
+ UNW_DPRINT(0, "unwind.%s: %p outside of regstk "
+ "[0x%lx-0x%lx)\n",
+ __FUNCTION__, (void *) addr,
+ info->regstk.limit,
+ info->regstk.top);
+ return -1;
+ }
+ if ((unsigned long) nat_addr >= info->regstk.top)
+ nat_addr = &info->sw->ar_rnat;
+ nat_mask = (1UL << ia64_rse_slot_num(addr));
+ break;
+ }
+ } else {
+ addr = &info->sw->r4 + (regnum - 4);
+ nat_addr = &info->sw->ar_unat;
+ nat_mask = (1UL << ((long) addr & 0x1f8)/8);
+ }
+ } else {
+ /* access a scratch register */
+ pt = get_scratch_regs(info);
+ addr = (unsigned long *) ((unsigned long)pt + pt_regs_off(regnum));
+ if (info->pri_unat_loc)
+ nat_addr = info->pri_unat_loc;
+ else
+ nat_addr = &info->sw->ar_unat;
+ nat_mask = (1UL << ((long) addr & 0x1f8)/8);
+ }
+ } else {
+ /* access a stacked register */
+ addr = ia64_rse_skip_regs((unsigned long *) info->bsp, regnum - 32);
+ nat_addr = ia64_rse_rnat_addr(addr);
+ if ((unsigned long) addr < info->regstk.limit
+ || (unsigned long) addr >= info->regstk.top)
+ {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to access register outside "
+ "of rbs\n", __FUNCTION__);
+ return -1;
+ }
+ if ((unsigned long) nat_addr >= info->regstk.top)
+ nat_addr = &info->sw->ar_rnat;
+ nat_mask = (1UL << ia64_rse_slot_num(addr));
+ }
+
+ if (write) {
+ if (read_only(addr)) {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
+ __FUNCTION__);
+ } else {
+ *addr = *val;
+ if (*nat)
+ *nat_addr |= nat_mask;
+ else
+ *nat_addr &= ~nat_mask;
+ }
+ } else {
+ if ((*nat_addr & nat_mask) == 0) {
+ *val = *addr;
+ *nat = 0;
+ } else {
+ *val = 0; /* if register is a NaT, *addr may contain kernel data! */
+ *nat = 1;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL(unw_access_gr);
+
+int
+unw_access_br (struct unw_frame_info *info, int regnum, unsigned long *val, int write)
+{
+ unsigned long *addr;
+ struct pt_regs *pt;
+
+ switch (regnum) {
+ /* scratch: */
+ case 0: pt = get_scratch_regs(info); addr = &pt->b0; break;
+ case 6: pt = get_scratch_regs(info); addr = &pt->b6; break;
+ case 7: pt = get_scratch_regs(info); addr = &pt->b7; break;
+
+ /* preserved: */
+ case 1: case 2: case 3: case 4: case 5:
+ addr = *(&info->b1_loc + (regnum - 1));
+ if (!addr)
+ addr = &info->sw->b1 + (regnum - 1);
+ break;
+
+ default:
+ UNW_DPRINT(0, "unwind.%s: trying to access non-existent b%u\n",
+ __FUNCTION__, regnum);
+ return -1;
+ }
+ if (write)
+ if (read_only(addr)) {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
+ __FUNCTION__);
+ } else
+ *addr = *val;
+ else
+ *val = *addr;
+ return 0;
+}
+EXPORT_SYMBOL(unw_access_br);
+
+int
+unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg *val, int write)
+{
+ struct ia64_fpreg *addr = NULL;
+ struct pt_regs *pt;
+
+ if ((unsigned) (regnum - 2) >= 126) {
+ UNW_DPRINT(0, "unwind.%s: trying to access non-existent f%u\n",
+ __FUNCTION__, regnum);
+ return -1;
+ }
+
+ if (regnum <= 5) {
+ addr = *(&info->f2_loc + (regnum - 2));
+ if (!addr)
+ addr = &info->sw->f2 + (regnum - 2);
+ } else if (regnum <= 15) {
+ if (regnum <= 11) {
+ pt = get_scratch_regs(info);
+ addr = &pt->f6 + (regnum - 6);
+ }
+ else
+ addr = &info->sw->f12 + (regnum - 12);
+ } else if (regnum <= 31) {
+ addr = info->fr_loc[regnum - 16];
+ if (!addr)
+ addr = &info->sw->f16 + (regnum - 16);
+ } else {
+ struct task_struct *t = info->task;
+
+ if (write)
+ ia64_sync_fph(t);
+ else
+ ia64_flush_fph(t);
+ addr = t->thread.fph + (regnum - 32);
+ }
+
+ if (write)
+ if (read_only(addr)) {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
+ __FUNCTION__);
+ } else
+ *addr = *val;
+ else
+ *val = *addr;
+ return 0;
+}
+EXPORT_SYMBOL(unw_access_fr);
+
+int
+unw_access_ar (struct unw_frame_info *info, int regnum, unsigned long *val, int write)
+{
+ unsigned long *addr;
+ struct pt_regs *pt;
+
+ switch (regnum) {
+ case UNW_AR_BSP:
+ addr = info->bsp_loc;
+ if (!addr)
+ addr = &info->sw->ar_bspstore;
+ break;
+
+ case UNW_AR_BSPSTORE:
+ addr = info->bspstore_loc;
+ if (!addr)
+ addr = &info->sw->ar_bspstore;
+ break;
+
+ case UNW_AR_PFS:
+ addr = info->pfs_loc;
+ if (!addr)
+ addr = &info->sw->ar_pfs;
+ break;
+
+ case UNW_AR_RNAT:
+ addr = info->rnat_loc;
+ if (!addr)
+ addr = &info->sw->ar_rnat;
+ break;
+
+ case UNW_AR_UNAT:
+ addr = info->unat_loc;
+ if (!addr)
+ addr = &info->sw->ar_unat;
+ break;
+
+ case UNW_AR_LC:
+ addr = info->lc_loc;
+ if (!addr)
+ addr = &info->sw->ar_lc;
+ break;
+
+ case UNW_AR_EC:
+ if (!info->cfm_loc)
+ return -1;
+ if (write)
+ *info->cfm_loc =
+ (*info->cfm_loc & ~(0x3fUL << 52)) | ((*val & 0x3f) << 52);
+ else
+ *val = (*info->cfm_loc >> 52) & 0x3f;
+ return 0;
+
+ case UNW_AR_FPSR:
+ addr = info->fpsr_loc;
+ if (!addr)
+ addr = &info->sw->ar_fpsr;
+ break;
+
+ case UNW_AR_RSC:
+ pt = get_scratch_regs(info);
+ addr = &pt->ar_rsc;
+ break;
+
+ case UNW_AR_CCV:
+ pt = get_scratch_regs(info);
+ addr = &pt->ar_ccv;
+ break;
+
+ case UNW_AR_CSD:
+ pt = get_scratch_regs(info);
+ addr = &pt->ar_csd;
+ break;
+
+ case UNW_AR_SSD:
+ pt = get_scratch_regs(info);
+ addr = &pt->ar_ssd;
+ break;
+
+ default:
+ UNW_DPRINT(0, "unwind.%s: trying to access non-existent ar%u\n",
+ __FUNCTION__, regnum);
+ return -1;
+ }
+
+ if (write) {
+ if (read_only(addr)) {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
+ __FUNCTION__);
+ } else
+ *addr = *val;
+ } else
+ *val = *addr;
+ return 0;
+}
+EXPORT_SYMBOL(unw_access_ar);
+
+int
+unw_access_pr (struct unw_frame_info *info, unsigned long *val, int write)
+{
+ unsigned long *addr;
+
+ addr = info->pr_loc;
+ if (!addr)
+ addr = &info->sw->pr;
+
+ if (write) {
+ if (read_only(addr)) {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
+ __FUNCTION__);
+ } else
+ *addr = *val;
+ } else
+ *val = *addr;
+ return 0;
+}
+EXPORT_SYMBOL(unw_access_pr);
+
+
+/* Routines to manipulate the state stack. */
+
+static inline void
+push (struct unw_state_record *sr)
+{
+ struct unw_reg_state *rs;
+
+ rs = alloc_reg_state();
+ if (!rs) {
+ printk(KERN_ERR "unwind: cannot stack reg state!\n");
+ return;
+ }
+ memcpy(rs, &sr->curr, sizeof(*rs));
+ sr->curr.next = rs;
+}
+
+static void
+pop (struct unw_state_record *sr)
+{
+ struct unw_reg_state *rs = sr->curr.next;
+
+ if (!rs) {
+ printk(KERN_ERR "unwind: stack underflow!\n");
+ return;
+ }
+ memcpy(&sr->curr, rs, sizeof(*rs));
+ free_reg_state(rs);
+}
+
+/* Make a copy of the state stack. Non-recursive to avoid stack overflows. */
+static struct unw_reg_state *
+dup_state_stack (struct unw_reg_state *rs)
+{
+ struct unw_reg_state *copy, *prev = NULL, *first = NULL;
+
+ while (rs) {
+ copy = alloc_reg_state();
+ if (!copy) {
+ printk(KERN_ERR "unwind.dup_state_stack: out of memory\n");
+ return NULL;
+ }
+ memcpy(copy, rs, sizeof(*copy));
+ if (first)
+ prev->next = copy;
+ else
+ first = copy;
+ rs = rs->next;
+ prev = copy;
+ }
+ return first;
+}
+
+/* Free all stacked register states (but not RS itself). */
+static void
+free_state_stack (struct unw_reg_state *rs)
+{
+ struct unw_reg_state *p, *next;
+
+ for (p = rs->next; p != NULL; p = next) {
+ next = p->next;
+ free_reg_state(p);
+ }
+ rs->next = NULL;
+}
+
+/* Unwind decoder routines */
+
+static enum unw_register_index __attribute_const__
+decode_abreg (unsigned char abreg, int memory)
+{
+ switch (abreg) {
+ case 0x04 ... 0x07: return UNW_REG_R4 + (abreg - 0x04);
+ case 0x22 ... 0x25: return UNW_REG_F2 + (abreg - 0x22);
+ case 0x30 ... 0x3f: return UNW_REG_F16 + (abreg - 0x30);
+ case 0x41 ... 0x45: return UNW_REG_B1 + (abreg - 0x41);
+ case 0x60: return UNW_REG_PR;
+ case 0x61: return UNW_REG_PSP;
+ case 0x62: return memory ? UNW_REG_PRI_UNAT_MEM : UNW_REG_PRI_UNAT_GR;
+ case 0x63: return UNW_REG_RP;
+ case 0x64: return UNW_REG_BSP;
+ case 0x65: return UNW_REG_BSPSTORE;
+ case 0x66: return UNW_REG_RNAT;
+ case 0x67: return UNW_REG_UNAT;
+ case 0x68: return UNW_REG_FPSR;
+ case 0x69: return UNW_REG_PFS;
+ case 0x6a: return UNW_REG_LC;
+ default:
+ break;
+ }
+ UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __FUNCTION__, abreg);
+ return UNW_REG_LC;
+}
+
+static void
+set_reg (struct unw_reg_info *reg, enum unw_where where, int when, unsigned long val)
+{
+ reg->val = val;
+ reg->where = where;
+ if (reg->when == UNW_WHEN_NEVER)
+ reg->when = when;
+}
+
+static void
+alloc_spill_area (unsigned long *offp, unsigned long regsize,
+ struct unw_reg_info *lo, struct unw_reg_info *hi)
+{
+ struct unw_reg_info *reg;
+
+ for (reg = hi; reg >= lo; --reg) {
+ if (reg->where == UNW_WHERE_SPILL_HOME) {
+ reg->where = UNW_WHERE_PSPREL;
+ *offp -= regsize;
+ reg->val = *offp;
+ }
+ }
+}
+
+static inline void
+spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim, unw_word t)
+{
+ struct unw_reg_info *reg;
+
+ for (reg = *regp; reg <= lim; ++reg) {
+ if (reg->where == UNW_WHERE_SPILL_HOME) {
+ reg->when = t;
+ *regp = reg + 1;
+ return;
+ }
+ }
+ UNW_DPRINT(0, "unwind.%s: excess spill!\n", __FUNCTION__);
+}
+
+static inline void
+finish_prologue (struct unw_state_record *sr)
+{
+ struct unw_reg_info *reg;
+ unsigned long off;
+ int i;
+
+ /*
+ * First, resolve implicit register save locations (see Section "11.4.2.3 Rules
+ * for Using Unwind Descriptors", rule 3):
+ */
+ for (i = 0; i < (int) ARRAY_SIZE(unw.save_order); ++i) {
+ reg = sr->curr.reg + unw.save_order[i];
+ if (reg->where == UNW_WHERE_GR_SAVE) {
+ reg->where = UNW_WHERE_GR;
+ reg->val = sr->gr_save_loc++;
+ }
+ }
+
+ /*
+ * Next, compute when the fp, general, and branch registers get
+ * saved. This must come before alloc_spill_area() because
+ * we need to know which registers are spilled to their home
+ * locations.
+ */
+ if (sr->imask) {
+ unsigned char kind, mask = 0, *cp = sr->imask;
+ int t;
+ static const unsigned char limit[3] = {
+ UNW_REG_F31, UNW_REG_R7, UNW_REG_B5
+ };
+ struct unw_reg_info *(regs[3]);
+
+ regs[0] = sr->curr.reg + UNW_REG_F2;
+ regs[1] = sr->curr.reg + UNW_REG_R4;
+ regs[2] = sr->curr.reg + UNW_REG_B1;
+
+ for (t = 0; t < sr->region_len; ++t) {
+ if ((t & 3) == 0)
+ mask = *cp++;
+ kind = (mask >> 2*(3-(t & 3))) & 3;
+ if (kind > 0)
+ spill_next_when(&regs[kind - 1], sr->curr.reg + limit[kind - 1],
+ sr->region_start + t);
+ }
+ }
+ /*
+ * Next, lay out the memory stack spill area:
+ */
+ if (sr->any_spills) {
+ off = sr->spill_offset;
+ alloc_spill_area(&off, 16, sr->curr.reg + UNW_REG_F2, sr->curr.reg + UNW_REG_F31);
+ alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_B1, sr->curr.reg + UNW_REG_B5);
+ alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_R4, sr->curr.reg + UNW_REG_R7);
+ }
+}
+
+/*
+ * Region header descriptors.
+ */
+
+static void
+desc_prologue (int body, unw_word rlen, unsigned char mask, unsigned char grsave,
+ struct unw_state_record *sr)
+{
+ int i, region_start;
+
+ if (!(sr->in_body || sr->first_region))
+ finish_prologue(sr);
+ sr->first_region = 0;
+
+ /* check if we're done: */
+ if (sr->when_target < sr->region_start + sr->region_len) {
+ sr->done = 1;
+ return;
+ }
+
+ region_start = sr->region_start + sr->region_len;
+
+ for (i = 0; i < sr->epilogue_count; ++i)
+ pop(sr);
+ sr->epilogue_count = 0;
+ sr->epilogue_start = UNW_WHEN_NEVER;
+
+ sr->region_start = region_start;
+ sr->region_len = rlen;
+ sr->in_body = body;
+
+ if (!body) {
+ push(sr);
+
+ for (i = 0; i < 4; ++i) {
+ if (mask & 0x8)
+ set_reg(sr->curr.reg + unw.save_order[i], UNW_WHERE_GR,
+ sr->region_start + sr->region_len - 1, grsave++);
+ mask <<= 1;
+ }
+ sr->gr_save_loc = grsave;
+ sr->any_spills = 0;
+ sr->imask = NULL;
+ sr->spill_offset = 0x10; /* default to psp+16 */
+ }
+}
+
+/*
+ * Prologue descriptors.
+ */
+
+static inline void
+desc_abi (unsigned char abi, unsigned char context, struct unw_state_record *sr)
+{
+ if (abi == 3 && context == 'i') {
+ sr->flags |= UNW_FLAG_INTERRUPT_FRAME;
+ UNW_DPRINT(3, "unwind.%s: interrupt frame\n", __FUNCTION__);
+ }
+ else
+ UNW_DPRINT(0, "unwind%s: ignoring unwabi(abi=0x%x,context=0x%x)\n",
+ __FUNCTION__, abi, context);
+}
+
+static inline void
+desc_br_gr (unsigned char brmask, unsigned char gr, struct unw_state_record *sr)
+{
+ int i;
+
+ for (i = 0; i < 5; ++i) {
+ if (brmask & 1)
+ set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_GR,
+ sr->region_start + sr->region_len - 1, gr++);
+ brmask >>= 1;
+ }
+}
+
+static inline void
+desc_br_mem (unsigned char brmask, struct unw_state_record *sr)
+{
+ int i;
+
+ for (i = 0; i < 5; ++i) {
+ if (brmask & 1) {
+ set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_SPILL_HOME,
+ sr->region_start + sr->region_len - 1, 0);
+ sr->any_spills = 1;
+ }
+ brmask >>= 1;
+ }
+}
+
+static inline void
+desc_frgr_mem (unsigned char grmask, unw_word frmask, struct unw_state_record *sr)
+{
+ int i;
+
+ for (i = 0; i < 4; ++i) {
+ if ((grmask & 1) != 0) {
+ set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME,
+ sr->region_start + sr->region_len - 1, 0);
+ sr->any_spills = 1;
+ }
+ grmask >>= 1;
+ }
+ for (i = 0; i < 20; ++i) {
+ if ((frmask & 1) != 0) {
+ int base = (i < 4) ? UNW_REG_F2 : UNW_REG_F16 - 4;
+ set_reg(sr->curr.reg + base + i, UNW_WHERE_SPILL_HOME,
+ sr->region_start + sr->region_len - 1, 0);
+ sr->any_spills = 1;
+ }
+ frmask >>= 1;
+ }
+}
+
+static inline void
+desc_fr_mem (unsigned char frmask, struct unw_state_record *sr)
+{
+ int i;
+
+ for (i = 0; i < 4; ++i) {
+ if ((frmask & 1) != 0) {
+ set_reg(sr->curr.reg + UNW_REG_F2 + i, UNW_WHERE_SPILL_HOME,
+ sr->region_start + sr->region_len - 1, 0);
+ sr->any_spills = 1;
+ }
+ frmask >>= 1;
+ }
+}
+
+static inline void
+desc_gr_gr (unsigned char grmask, unsigned char gr, struct unw_state_record *sr)
+{
+ int i;
+
+ for (i = 0; i < 4; ++i) {
+ if ((grmask & 1) != 0)
+ set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_GR,
+ sr->region_start + sr->region_len - 1, gr++);
+ grmask >>= 1;
+ }
+}
+
+static inline void
+desc_gr_mem (unsigned char grmask, struct unw_state_record *sr)
+{
+ int i;
+
+ for (i = 0; i < 4; ++i) {
+ if ((grmask & 1) != 0) {
+ set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME,
+ sr->region_start + sr->region_len - 1, 0);
+ sr->any_spills = 1;
+ }
+ grmask >>= 1;
+ }
+}
+
+static inline void
+desc_mem_stack_f (unw_word t, unw_word size, struct unw_state_record *sr)
+{
+ set_reg(sr->curr.reg + UNW_REG_PSP, UNW_WHERE_NONE,
+ sr->region_start + min_t(int, t, sr->region_len - 1), 16*size);
+}
+
+static inline void
+desc_mem_stack_v (unw_word t, struct unw_state_record *sr)
+{
+ sr->curr.reg[UNW_REG_PSP].when = sr->region_start + min_t(int, t, sr->region_len - 1);
+}
+
+static inline void
+desc_reg_gr (unsigned char reg, unsigned char dst, struct unw_state_record *sr)
+{
+ set_reg(sr->curr.reg + reg, UNW_WHERE_GR, sr->region_start + sr->region_len - 1, dst);
+}
+
+static inline void
+desc_reg_psprel (unsigned char reg, unw_word pspoff, struct unw_state_record *sr)
+{
+ set_reg(sr->curr.reg + reg, UNW_WHERE_PSPREL, sr->region_start + sr->region_len - 1,
+ 0x10 - 4*pspoff);
+}
+
+static inline void
+desc_reg_sprel (unsigned char reg, unw_word spoff, struct unw_state_record *sr)
+{
+ set_reg(sr->curr.reg + reg, UNW_WHERE_SPREL, sr->region_start + sr->region_len - 1,
+ 4*spoff);
+}
+
+static inline void
+desc_rp_br (unsigned char dst, struct unw_state_record *sr)
+{
+ sr->return_link_reg = dst;
+}
+
+static inline void
+desc_reg_when (unsigned char regnum, unw_word t, struct unw_state_record *sr)
+{
+ struct unw_reg_info *reg = sr->curr.reg + regnum;
+
+ if (reg->where == UNW_WHERE_NONE)
+ reg->where = UNW_WHERE_GR_SAVE;
+ reg->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+}
+
+static inline void
+desc_spill_base (unw_word pspoff, struct unw_state_record *sr)
+{
+ sr->spill_offset = 0x10 - 4*pspoff;
+}
+
+static inline unsigned char *
+desc_spill_mask (unsigned char *imaskp, struct unw_state_record *sr)
+{
+ sr->imask = imaskp;
+ return imaskp + (2*sr->region_len + 7)/8;
+}
+
+/*
+ * Body descriptors.
+ */
+static inline void
+desc_epilogue (unw_word t, unw_word ecount, struct unw_state_record *sr)
+{
+ sr->epilogue_start = sr->region_start + sr->region_len - 1 - t;
+ sr->epilogue_count = ecount + 1;
+}
+
+static inline void
+desc_copy_state (unw_word label, struct unw_state_record *sr)
+{
+ struct unw_labeled_state *ls;
+
+ for (ls = sr->labeled_states; ls; ls = ls->next) {
+ if (ls->label == label) {
+ free_state_stack(&sr->curr);
+ memcpy(&sr->curr, &ls->saved_state, sizeof(sr->curr));
+ sr->curr.next = dup_state_stack(ls->saved_state.next);
+ return;
+ }
+ }
+ printk(KERN_ERR "unwind: failed to find state labeled 0x%lx\n", label);
+}
+
+static inline void
+desc_label_state (unw_word label, struct unw_state_record *sr)
+{
+ struct unw_labeled_state *ls;
+
+ ls = alloc_labeled_state();
+ if (!ls) {
+ printk(KERN_ERR "unwind.desc_label_state(): out of memory\n");
+ return;
+ }
+ ls->label = label;
+ memcpy(&ls->saved_state, &sr->curr, sizeof(ls->saved_state));
+ ls->saved_state.next = dup_state_stack(sr->curr.next);
+
+ /* insert into list of labeled states: */
+ ls->next = sr->labeled_states;
+ sr->labeled_states = ls;
+}
+
+/*
+ * General descriptors.
+ */
+
+static inline int
+desc_is_active (unsigned char qp, unw_word t, struct unw_state_record *sr)
+{
+ if (sr->when_target <= sr->region_start + min_t(int, t, sr->region_len - 1))
+ return 0;
+ if (qp > 0) {
+ if ((sr->pr_val & (1UL << qp)) == 0)
+ return 0;
+ sr->pr_mask |= (1UL << qp);
+ }
+ return 1;
+}
+
+static inline void
+desc_restore_p (unsigned char qp, unw_word t, unsigned char abreg, struct unw_state_record *sr)
+{
+ struct unw_reg_info *r;
+
+ if (!desc_is_active(qp, t, sr))
+ return;
+
+ r = sr->curr.reg + decode_abreg(abreg, 0);
+ r->where = UNW_WHERE_NONE;
+ r->when = UNW_WHEN_NEVER;
+ r->val = 0;
+}
+
+static inline void
+desc_spill_reg_p (unsigned char qp, unw_word t, unsigned char abreg, unsigned char x,
+ unsigned char ytreg, struct unw_state_record *sr)
+{
+ enum unw_where where = UNW_WHERE_GR;
+ struct unw_reg_info *r;
+
+ if (!desc_is_active(qp, t, sr))
+ return;
+
+ if (x)
+ where = UNW_WHERE_BR;
+ else if (ytreg & 0x80)
+ where = UNW_WHERE_FR;
+
+ r = sr->curr.reg + decode_abreg(abreg, 0);
+ r->where = where;
+ r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+ r->val = (ytreg & 0x7f);
+}
+
+static inline void
+desc_spill_psprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word pspoff,
+ struct unw_state_record *sr)
+{
+ struct unw_reg_info *r;
+
+ if (!desc_is_active(qp, t, sr))
+ return;
+
+ r = sr->curr.reg + decode_abreg(abreg, 1);
+ r->where = UNW_WHERE_PSPREL;
+ r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+ r->val = 0x10 - 4*pspoff;
+}
+
+static inline void
+desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word spoff,
+ struct unw_state_record *sr)
+{
+ struct unw_reg_info *r;
+
+ if (!desc_is_active(qp, t, sr))
+ return;
+
+ r = sr->curr.reg + decode_abreg(abreg, 1);
+ r->where = UNW_WHERE_SPREL;
+ r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+ r->val = 4*spoff;
+}
+
+#define UNW_DEC_BAD_CODE(code) printk(KERN_ERR "unwind: unknown code 0x%02x\n", \
+ code);
+
+/*
+ * region headers:
+ */
+#define UNW_DEC_PROLOGUE_GR(fmt,r,m,gr,arg) desc_prologue(0,r,m,gr,arg)
+#define UNW_DEC_PROLOGUE(fmt,b,r,arg) desc_prologue(b,r,0,32,arg)
+/*
+ * prologue descriptors:
+ */
+#define UNW_DEC_ABI(fmt,a,c,arg) desc_abi(a,c,arg)
+#define UNW_DEC_BR_GR(fmt,b,g,arg) desc_br_gr(b,g,arg)
+#define UNW_DEC_BR_MEM(fmt,b,arg) desc_br_mem(b,arg)
+#define UNW_DEC_FRGR_MEM(fmt,g,f,arg) desc_frgr_mem(g,f,arg)
+#define UNW_DEC_FR_MEM(fmt,f,arg) desc_fr_mem(f,arg)
+#define UNW_DEC_GR_GR(fmt,m,g,arg) desc_gr_gr(m,g,arg)
+#define UNW_DEC_GR_MEM(fmt,m,arg) desc_gr_mem(m,arg)
+#define UNW_DEC_MEM_STACK_F(fmt,t,s,arg) desc_mem_stack_f(t,s,arg)
+#define UNW_DEC_MEM_STACK_V(fmt,t,arg) desc_mem_stack_v(t,arg)
+#define UNW_DEC_REG_GR(fmt,r,d,arg) desc_reg_gr(r,d,arg)
+#define UNW_DEC_REG_PSPREL(fmt,r,o,arg) desc_reg_psprel(r,o,arg)
+#define UNW_DEC_REG_SPREL(fmt,r,o,arg) desc_reg_sprel(r,o,arg)
+#define UNW_DEC_REG_WHEN(fmt,r,t,arg) desc_reg_when(r,t,arg)
+#define UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_GR,t,arg)
+#define UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_MEM,t,arg)
+#define UNW_DEC_PRIUNAT_GR(fmt,r,arg) desc_reg_gr(UNW_REG_PRI_UNAT_GR,r,arg)
+#define UNW_DEC_PRIUNAT_PSPREL(fmt,o,arg) desc_reg_psprel(UNW_REG_PRI_UNAT_MEM,o,arg)
+#define UNW_DEC_PRIUNAT_SPREL(fmt,o,arg) desc_reg_sprel(UNW_REG_PRI_UNAT_MEM,o,arg)
+#define UNW_DEC_RP_BR(fmt,d,arg) desc_rp_br(d,arg)
+#define UNW_DEC_SPILL_BASE(fmt,o,arg) desc_spill_base(o,arg)
+#define UNW_DEC_SPILL_MASK(fmt,m,arg) (m = desc_spill_mask(m,arg))
+/*
+ * body descriptors:
+ */
+#define UNW_DEC_EPILOGUE(fmt,t,c,arg) desc_epilogue(t,c,arg)
+#define UNW_DEC_COPY_STATE(fmt,l,arg) desc_copy_state(l,arg)
+#define UNW_DEC_LABEL_STATE(fmt,l,arg) desc_label_state(l,arg)
+/*
+ * general unwind descriptors:
+ */
+#define UNW_DEC_SPILL_REG_P(f,p,t,a,x,y,arg) desc_spill_reg_p(p,t,a,x,y,arg)
+#define UNW_DEC_SPILL_REG(f,t,a,x,y,arg) desc_spill_reg_p(0,t,a,x,y,arg)
+#define UNW_DEC_SPILL_PSPREL_P(f,p,t,a,o,arg) desc_spill_psprel_p(p,t,a,o,arg)
+#define UNW_DEC_SPILL_PSPREL(f,t,a,o,arg) desc_spill_psprel_p(0,t,a,o,arg)
+#define UNW_DEC_SPILL_SPREL_P(f,p,t,a,o,arg) desc_spill_sprel_p(p,t,a,o,arg)
+#define UNW_DEC_SPILL_SPREL(f,t,a,o,arg) desc_spill_sprel_p(0,t,a,o,arg)
+#define UNW_DEC_RESTORE_P(f,p,t,a,arg) desc_restore_p(p,t,a,arg)
+#define UNW_DEC_RESTORE(f,t,a,arg) desc_restore_p(0,t,a,arg)
+
+#include "unwind_decoder.c"
+
+
+/* Unwind scripts. */
+
+static inline unw_hash_index_t
+hash (unsigned long ip)
+{
+# define hashmagic 0x9e3779b97f4a7c16UL /* based on (sqrt(5)/2-1)*2^64 */
+
+ return (ip >> 4)*hashmagic >> (64 - UNW_LOG_HASH_SIZE);
+#undef hashmagic
+}
+
+static inline long
+cache_match (struct unw_script *script, unsigned long ip, unsigned long pr)
+{
+ read_lock(&script->lock);
+ if (ip == script->ip && ((pr ^ script->pr_val) & script->pr_mask) == 0)
+ /* keep the read lock... */
+ return 1;
+ read_unlock(&script->lock);
+ return 0;
+}
+
+static inline struct unw_script *
+script_lookup (struct unw_frame_info *info)
+{
+ struct unw_script *script = unw.cache + info->hint;
+ unsigned short index;
+ unsigned long ip, pr;
+
+ if (UNW_DEBUG_ON(0))
+ return NULL; /* Always regenerate scripts in debug mode */
+
+ STAT(++unw.stat.cache.lookups);
+
+ ip = info->ip;
+ pr = info->pr;
+
+ if (cache_match(script, ip, pr)) {
+ STAT(++unw.stat.cache.hinted_hits);
+ return script;
+ }
+
+ index = unw.hash[hash(ip)];
+ if (index >= UNW_CACHE_SIZE)
+ return NULL;
+
+ script = unw.cache + index;
+ while (1) {
+ if (cache_match(script, ip, pr)) {
+ /* update hint; no locking required as single-word writes are atomic */
+ STAT(++unw.stat.cache.normal_hits);
+ unw.cache[info->prev_script].hint = script - unw.cache;
+ return script;
+ }
+ if (script->coll_chain >= UNW_HASH_SIZE)
+ return NULL;
+ script = unw.cache + script->coll_chain;
+ STAT(++unw.stat.cache.collision_chain_traversals);
+ }
+}
+
+/*
+ * On returning, a write lock for the SCRIPT is still being held.
+ */
+static inline struct unw_script *
+script_new (unsigned long ip)
+{
+ struct unw_script *script, *prev, *tmp;
+ unw_hash_index_t index;
+ unsigned short head;
+
+ STAT(++unw.stat.script.news);
+
+ /*
+ * Can't (easily) use cmpxchg() here because of ABA problem
+ * that is intrinsic in cmpxchg()...
+ */
+ head = unw.lru_head;
+ script = unw.cache + head;
+ unw.lru_head = script->lru_chain;
+
+ /*
+ * We'd deadlock here if we interrupted a thread that is holding a read lock on
+ * script->lock. Thus, if the write_trylock() fails, we simply bail out. The
+ * alternative would be to disable interrupts whenever we hold a read-lock, but
+ * that seems silly.
+ */
+ if (!write_trylock(&script->lock))
+ return NULL;
+
+ /* re-insert script at the tail of the LRU chain: */
+ unw.cache[unw.lru_tail].lru_chain = head;
+ unw.lru_tail = head;
+
+ /* remove the old script from the hash table (if it's there): */
+ if (script->ip) {
+ index = hash(script->ip);
+ tmp = unw.cache + unw.hash[index];
+ prev = NULL;
+ while (1) {
+ if (tmp == script) {
+ if (prev)
+ prev->coll_chain = tmp->coll_chain;
+ else
+ unw.hash[index] = tmp->coll_chain;
+ break;
+ } else
+ prev = tmp;
+ if (tmp->coll_chain >= UNW_CACHE_SIZE)
+ /* old script wasn't in the hash-table */
+ break;
+ tmp = unw.cache + tmp->coll_chain;
+ }
+ }
+
+ /* enter new script in the hash table */
+ index = hash(ip);
+ script->coll_chain = unw.hash[index];
+ unw.hash[index] = script - unw.cache;
+
+ script->ip = ip; /* set new IP while we're holding the locks */
+
+ STAT(if (script->coll_chain < UNW_CACHE_SIZE) ++unw.stat.script.collisions);
+
+ script->flags = 0;
+ script->hint = 0;
+ script->count = 0;
+ return script;
+}
+
+static void
+script_finalize (struct unw_script *script, struct unw_state_record *sr)
+{
+ script->pr_mask = sr->pr_mask;
+ script->pr_val = sr->pr_val;
+ /*
+ * We could down-grade our write-lock on script->lock here but
+ * the rwlock API doesn't offer atomic lock downgrading, so
+ * we'll just keep the write-lock and release it later when
+ * we're done using the script.
+ */
+}
+
+static inline void
+script_emit (struct unw_script *script, struct unw_insn insn)
+{
+ if (script->count >= UNW_MAX_SCRIPT_LEN) {
+ UNW_DPRINT(0, "unwind.%s: script exceeds maximum size of %u instructions!\n",
+ __FUNCTION__, UNW_MAX_SCRIPT_LEN);
+ return;
+ }
+ script->insn[script->count++] = insn;
+}
+
+static inline void
+emit_nat_info (struct unw_state_record *sr, int i, struct unw_script *script)
+{
+ struct unw_reg_info *r = sr->curr.reg + i;
+ enum unw_insn_opcode opc;
+ struct unw_insn insn;
+ unsigned long val = 0;
+
+ switch (r->where) {
+ case UNW_WHERE_GR:
+ if (r->val >= 32) {
+ /* register got spilled to a stacked register */
+ opc = UNW_INSN_SETNAT_TYPE;
+ val = UNW_NAT_REGSTK;
+ } else
+ /* register got spilled to a scratch register */
+ opc = UNW_INSN_SETNAT_MEMSTK;
+ break;
+
+ case UNW_WHERE_FR:
+ opc = UNW_INSN_SETNAT_TYPE;
+ val = UNW_NAT_VAL;
+ break;
+
+ case UNW_WHERE_BR:
+ opc = UNW_INSN_SETNAT_TYPE;
+ val = UNW_NAT_NONE;
+ break;
+
+ case UNW_WHERE_PSPREL:
+ case UNW_WHERE_SPREL:
+ opc = UNW_INSN_SETNAT_MEMSTK;
+ break;
+
+ default:
+ UNW_DPRINT(0, "unwind.%s: don't know how to emit nat info for where = %u\n",
+ __FUNCTION__, r->where);
+ return;
+ }
+ insn.opc = opc;
+ insn.dst = unw.preg_index[i];
+ insn.val = val;
+ script_emit(script, insn);
+}
+
+static void
+compile_reg (struct unw_state_record *sr, int i, struct unw_script *script)
+{
+ struct unw_reg_info *r = sr->curr.reg + i;
+ enum unw_insn_opcode opc;
+ unsigned long val, rval;
+ struct unw_insn insn;
+ long need_nat_info;
+
+ if (r->where == UNW_WHERE_NONE || r->when >= sr->when_target)
+ return;
+
+ opc = UNW_INSN_MOVE;
+ val = rval = r->val;
+ need_nat_info = (i >= UNW_REG_R4 && i <= UNW_REG_R7);
+
+ switch (r->where) {
+ case UNW_WHERE_GR:
+ if (rval >= 32) {
+ opc = UNW_INSN_MOVE_STACKED;
+ val = rval - 32;
+ } else if (rval >= 4 && rval <= 7) {
+ if (need_nat_info) {
+ opc = UNW_INSN_MOVE2;
+ need_nat_info = 0;
+ }
+ val = unw.preg_index[UNW_REG_R4 + (rval - 4)];
+ } else if (rval == 0) {
+ opc = UNW_INSN_MOVE_CONST;
+ val = 0;
+ } else {
+ /* register got spilled to a scratch register */
+ opc = UNW_INSN_MOVE_SCRATCH;
+ val = pt_regs_off(rval);
+ }
+ break;
+
+ case UNW_WHERE_FR:
+ if (rval <= 5)
+ val = unw.preg_index[UNW_REG_F2 + (rval - 2)];
+ else if (rval >= 16 && rval <= 31)
+ val = unw.preg_index[UNW_REG_F16 + (rval - 16)];
+ else {
+ opc = UNW_INSN_MOVE_SCRATCH;
+ if (rval <= 11)
+ val = offsetof(struct pt_regs, f6) + 16*(rval - 6);
+ else
+ UNW_DPRINT(0, "unwind.%s: kernel may not touch f%lu\n",
+ __FUNCTION__, rval);
+ }
+ break;
+
+ case UNW_WHERE_BR:
+ if (rval >= 1 && rval <= 5)
+ val = unw.preg_index[UNW_REG_B1 + (rval - 1)];
+ else {
+ opc = UNW_INSN_MOVE_SCRATCH;
+ if (rval == 0)
+ val = offsetof(struct pt_regs, b0);
+ else if (rval == 6)
+ val = offsetof(struct pt_regs, b6);
+ else
+ val = offsetof(struct pt_regs, b7);
+ }
+ break;
+
+ case UNW_WHERE_SPREL:
+ opc = UNW_INSN_ADD_SP;
+ break;
+
+ case UNW_WHERE_PSPREL:
+ opc = UNW_INSN_ADD_PSP;
+ break;
+
+ default:
+ UNW_DPRINT(0, "unwind%s: register %u has unexpected `where' value of %u\n",
+ __FUNCTION__, i, r->where);
+ break;
+ }
+ insn.opc = opc;
+ insn.dst = unw.preg_index[i];
+ insn.val = val;
+ script_emit(script, insn);
+ if (need_nat_info)
+ emit_nat_info(sr, i, script);
+
+ if (i == UNW_REG_PSP) {
+ /*
+ * info->psp must contain the _value_ of the previous
+ * sp, not it's save location. We get this by
+ * dereferencing the value we just stored in
+ * info->psp:
+ */
+ insn.opc = UNW_INSN_LOAD;
+ insn.dst = insn.val = unw.preg_index[UNW_REG_PSP];
+ script_emit(script, insn);
+ }
+}
+
+static inline const struct unw_table_entry *
+lookup (struct unw_table *table, unsigned long rel_ip)
+{
+ const struct unw_table_entry *e = NULL;
+ unsigned long lo, hi, mid;
+
+ /* do a binary search for right entry: */
+ for (lo = 0, hi = table->length; lo < hi; ) {
+ mid = (lo + hi) / 2;
+ e = &table->array[mid];
+ if (rel_ip < e->start_offset)
+ hi = mid;
+ else if (rel_ip >= e->end_offset)
+ lo = mid + 1;
+ else
+ break;
+ }
+ if (rel_ip < e->start_offset || rel_ip >= e->end_offset)
+ return NULL;
+ return e;
+}
+
+/*
+ * Build an unwind script that unwinds from state OLD_STATE to the
+ * entrypoint of the function that called OLD_STATE.
+ */
+static inline struct unw_script *
+build_script (struct unw_frame_info *info)
+{
+ const struct unw_table_entry *e = NULL;
+ struct unw_script *script = NULL;
+ struct unw_labeled_state *ls, *next;
+ unsigned long ip = info->ip;
+ struct unw_state_record sr;
+ struct unw_table *table;
+ struct unw_reg_info *r;
+ struct unw_insn insn;
+ u8 *dp, *desc_end;
+ u64 hdr;
+ int i;
+ STAT(unsigned long start, parse_start;)
+
+ STAT(++unw.stat.script.builds; start = ia64_get_itc());
+
+ /* build state record */
+ memset(&sr, 0, sizeof(sr));
+ for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r)
+ r->when = UNW_WHEN_NEVER;
+ sr.pr_val = info->pr;
+
+ UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __FUNCTION__, ip);
+ script = script_new(ip);
+ if (!script) {
+ UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n", __FUNCTION__);
+ STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+ return NULL;
+ }
+ unw.cache[info->prev_script].hint = script - unw.cache;
+
+ /* search the kernels and the modules' unwind tables for IP: */
+
+ STAT(parse_start = ia64_get_itc());
+
+ for (table = unw.tables; table; table = table->next) {
+ if (ip >= table->start && ip < table->end) {
+ e = lookup(table, ip - table->segment_base);
+ break;
+ }
+ }
+ if (!e) {
+ /* no info, return default unwinder (leaf proc, no mem stack, no saved regs) */
+ UNW_DPRINT(1, "unwind.%s: no unwind info for ip=0x%lx (prev ip=0x%lx)\n",
+ __FUNCTION__, ip, unw.cache[info->prev_script].ip);
+ sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
+ sr.curr.reg[UNW_REG_RP].when = -1;
+ sr.curr.reg[UNW_REG_RP].val = 0;
+ compile_reg(&sr, UNW_REG_RP, script);
+ script_finalize(script, &sr);
+ STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
+ STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+ return script;
+ }
+
+ sr.when_target = (3*((ip & ~0xfUL) - (table->segment_base + e->start_offset))/16
+ + (ip & 0xfUL));
+ hdr = *(u64 *) (table->segment_base + e->info_offset);
+ dp = (u8 *) (table->segment_base + e->info_offset + 8);
+ desc_end = dp + 8*UNW_LENGTH(hdr);
+
+ while (!sr.done && dp < desc_end)
+ dp = unw_decode(dp, sr.in_body, &sr);
+
+ if (sr.when_target > sr.epilogue_start) {
+ /*
+ * sp has been restored and all values on the memory stack below
+ * psp also have been restored.
+ */
+ sr.curr.reg[UNW_REG_PSP].val = 0;
+ sr.curr.reg[UNW_REG_PSP].where = UNW_WHERE_NONE;
+ sr.curr.reg[UNW_REG_PSP].when = UNW_WHEN_NEVER;
+ for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r)
+ if ((r->where == UNW_WHERE_PSPREL && r->val <= 0x10)
+ || r->where == UNW_WHERE_SPREL)
+ {
+ r->val = 0;
+ r->where = UNW_WHERE_NONE;
+ r->when = UNW_WHEN_NEVER;
+ }
+ }
+
+ script->flags = sr.flags;
+
+ /*
+ * If RP did't get saved, generate entry for the return link
+ * register.
+ */
+ if (sr.curr.reg[UNW_REG_RP].when >= sr.when_target) {
+ sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
+ sr.curr.reg[UNW_REG_RP].when = -1;
+ sr.curr.reg[UNW_REG_RP].val = sr.return_link_reg;
+ UNW_DPRINT(1, "unwind.%s: using default for rp at ip=0x%lx where=%d val=0x%lx\n",
+ __FUNCTION__, ip, sr.curr.reg[UNW_REG_RP].where,
+ sr.curr.reg[UNW_REG_RP].val);
+ }
+
+#ifdef UNW_DEBUG
+ UNW_DPRINT(1, "unwind.%s: state record for func 0x%lx, t=%u:\n",
+ __FUNCTION__, table->segment_base + e->start_offset, sr.when_target);
+ for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) {
+ if (r->where != UNW_WHERE_NONE || r->when != UNW_WHEN_NEVER) {
+ UNW_DPRINT(1, " %s <- ", unw.preg_name[r - sr.curr.reg]);
+ switch (r->where) {
+ case UNW_WHERE_GR: UNW_DPRINT(1, "r%lu", r->val); break;
+ case UNW_WHERE_FR: UNW_DPRINT(1, "f%lu", r->val); break;
+ case UNW_WHERE_BR: UNW_DPRINT(1, "b%lu", r->val); break;
+ case UNW_WHERE_SPREL: UNW_DPRINT(1, "[sp+0x%lx]", r->val); break;
+ case UNW_WHERE_PSPREL: UNW_DPRINT(1, "[psp+0x%lx]", r->val); break;
+ case UNW_WHERE_NONE:
+ UNW_DPRINT(1, "%s+0x%lx", unw.preg_name[r - sr.curr.reg], r->val);
+ break;
+
+ default:
+ UNW_DPRINT(1, "BADWHERE(%d)", r->where);
+ break;
+ }
+ UNW_DPRINT(1, "\t\t%d\n", r->when);
+ }
+ }
+#endif
+
+ STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
+
+ /* translate state record into unwinder instructions: */
+
+ /*
+ * First, set psp if we're dealing with a fixed-size frame;
+ * subsequent instructions may depend on this value.
+ */
+ if (sr.when_target > sr.curr.reg[UNW_REG_PSP].when
+ && (sr.curr.reg[UNW_REG_PSP].where == UNW_WHERE_NONE)
+ && sr.curr.reg[UNW_REG_PSP].val != 0) {
+ /* new psp is sp plus frame size */
+ insn.opc = UNW_INSN_ADD;
+ insn.dst = offsetof(struct unw_frame_info, psp)/8;
+ insn.val = sr.curr.reg[UNW_REG_PSP].val; /* frame size */
+ script_emit(script, insn);
+ }
+
+ /* determine where the primary UNaT is: */
+ if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_GR].when)
+ i = UNW_REG_PRI_UNAT_MEM;
+ else if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when)
+ i = UNW_REG_PRI_UNAT_GR;
+ else if (sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when > sr.curr.reg[UNW_REG_PRI_UNAT_GR].when)
+ i = UNW_REG_PRI_UNAT_MEM;
+ else
+ i = UNW_REG_PRI_UNAT_GR;
+
+ compile_reg(&sr, i, script);
+
+ for (i = UNW_REG_BSP; i < UNW_NUM_REGS; ++i)
+ compile_reg(&sr, i, script);
+
+ /* free labeled register states & stack: */
+
+ STAT(parse_start = ia64_get_itc());
+ for (ls = sr.labeled_states; ls; ls = next) {
+ next = ls->next;
+ free_state_stack(&ls->saved_state);
+ free_labeled_state(ls);
+ }
+ free_state_stack(&sr.curr);
+ STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
+
+ script_finalize(script, &sr);
+ STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+ return script;
+}
+
+/*
+ * Apply the unwinding actions represented by OPS and update SR to
+ * reflect the state that existed upon entry to the function that this
+ * unwinder represents.
+ */
+static inline void
+run_script (struct unw_script *script, struct unw_frame_info *state)
+{
+ struct unw_insn *ip, *limit, next_insn;
+ unsigned long opc, dst, val, off;
+ unsigned long *s = (unsigned long *) state;
+ STAT(unsigned long start;)
+
+ STAT(++unw.stat.script.runs; start = ia64_get_itc());
+ state->flags = script->flags;
+ ip = script->insn;
+ limit = script->insn + script->count;
+ next_insn = *ip;
+
+ while (ip++ < limit) {
+ opc = next_insn.opc;
+ dst = next_insn.dst;
+ val = next_insn.val;
+ next_insn = *ip;
+
+ redo:
+ switch (opc) {
+ case UNW_INSN_ADD:
+ s[dst] += val;
+ break;
+
+ case UNW_INSN_MOVE2:
+ if (!s[val])
+ goto lazy_init;
+ s[dst+1] = s[val+1];
+ s[dst] = s[val];
+ break;
+
+ case UNW_INSN_MOVE:
+ if (!s[val])
+ goto lazy_init;
+ s[dst] = s[val];
+ break;
+
+ case UNW_INSN_MOVE_SCRATCH:
+ if (state->pt) {
+ s[dst] = (unsigned long) get_scratch_regs(state) + val;
+ } else {
+ s[dst] = 0;
+ UNW_DPRINT(0, "unwind.%s: no state->pt, dst=%ld, val=%ld\n",
+ __FUNCTION__, dst, val);
+ }
+ break;
+
+ case UNW_INSN_MOVE_CONST:
+ if (val == 0)
+ s[dst] = (unsigned long) &unw.r0;
+ else {
+ s[dst] = 0;
+ UNW_DPRINT(0, "unwind.%s: UNW_INSN_MOVE_CONST bad val=%ld\n",
+ __FUNCTION__, val);
+ }
+ break;
+
+
+ case UNW_INSN_MOVE_STACKED:
+ s[dst] = (unsigned long) ia64_rse_skip_regs((unsigned long *)state->bsp,
+ val);
+ break;
+
+ case UNW_INSN_ADD_PSP:
+ s[dst] = state->psp + val;
+ break;
+
+ case UNW_INSN_ADD_SP:
+ s[dst] = state->sp + val;
+ break;
+
+ case UNW_INSN_SETNAT_MEMSTK:
+ if (!state->pri_unat_loc)
+ state->pri_unat_loc = &state->sw->ar_unat;
+ /* register off. is a multiple of 8, so the least 3 bits (type) are 0 */
+ s[dst+1] = ((unsigned long) state->pri_unat_loc - s[dst]) | UNW_NAT_MEMSTK;
+ break;
+
+ case UNW_INSN_SETNAT_TYPE:
+ s[dst+1] = val;
+ break;
+
+ case UNW_INSN_LOAD:
+#ifdef UNW_DEBUG
+ if ((s[val] & (local_cpu_data->unimpl_va_mask | 0x7)) != 0
+ || s[val] < TASK_SIZE)
+ {
+ UNW_DPRINT(0, "unwind.%s: rejecting bad psp=0x%lx\n",
+ __FUNCTION__, s[val]);
+ break;
+ }
+#endif
+ s[dst] = *(unsigned long *) s[val];
+ break;
+ }
+ }
+ STAT(unw.stat.script.run_time += ia64_get_itc() - start);
+ return;
+
+ lazy_init:
+ off = unw.sw_off[val];
+ s[val] = (unsigned long) state->sw + off;
+ if (off >= offsetof(struct switch_stack, r4) && off <= offsetof(struct switch_stack, r7))
+ /*
+ * We're initializing a general register: init NaT info, too. Note that
+ * the offset is a multiple of 8 which gives us the 3 bits needed for
+ * the type field.
+ */
+ s[val+1] = (offsetof(struct switch_stack, ar_unat) - off) | UNW_NAT_MEMSTK;
+ goto redo;
+}
+
+static int
+find_save_locs (struct unw_frame_info *info)
+{
+ int have_write_lock = 0;
+ struct unw_script *scr;
+ unsigned long flags = 0;
+
+ if ((info->ip & (local_cpu_data->unimpl_va_mask | 0xf)) || info->ip < TASK_SIZE) {
+ /* don't let obviously bad addresses pollute the cache */
+ /* FIXME: should really be level 0 but it occurs too often. KAO */
+ UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n", __FUNCTION__, info->ip);
+ info->rp_loc = NULL;
+ return -1;
+ }
+
+ scr = script_lookup(info);
+ if (!scr) {
+ spin_lock_irqsave(&unw.lock, flags);
+ scr = build_script(info);
+ if (!scr) {
+ spin_unlock_irqrestore(&unw.lock, flags);
+ UNW_DPRINT(0,
+ "unwind.%s: failed to locate/build unwind script for ip %lx\n",
+ __FUNCTION__, info->ip);
+ return -1;
+ }
+ have_write_lock = 1;
+ }
+ info->hint = scr->hint;
+ info->prev_script = scr - unw.cache;
+
+ run_script(scr, info);
+
+ if (have_write_lock) {
+ write_unlock(&scr->lock);
+ spin_unlock_irqrestore(&unw.lock, flags);
+ } else
+ read_unlock(&scr->lock);
+ return 0;
+}
+
+int
+unw_unwind (struct unw_frame_info *info)
+{
+ unsigned long prev_ip, prev_sp, prev_bsp;
+ unsigned long ip, pr, num_regs;
+ STAT(unsigned long start, flags;)
+ int retval;
+
+ STAT(local_irq_save(flags); ++unw.stat.api.unwinds; start = ia64_get_itc());
+
+ prev_ip = info->ip;
+ prev_sp = info->sp;
+ prev_bsp = info->bsp;
+
+ /* restore the ip */
+ if (!info->rp_loc) {
+ /* FIXME: should really be level 0 but it occurs too often. KAO */
+ UNW_DPRINT(1, "unwind.%s: failed to locate return link (ip=0x%lx)!\n",
+ __FUNCTION__, info->ip);
+ STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
+ return -1;
+ }
+ ip = info->ip = *info->rp_loc;
+ if (ip < GATE_ADDR) {
+ UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", __FUNCTION__, ip);
+ STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
+ return -1;
+ }
+
+ /* restore the cfm: */
+ if (!info->pfs_loc) {
+ UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", __FUNCTION__);
+ STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
+ return -1;
+ }
+ info->cfm_loc = info->pfs_loc;
+
+ /* restore the bsp: */
+ pr = info->pr;
+ num_regs = 0;
+ if ((info->flags & UNW_FLAG_INTERRUPT_FRAME)) {
+ info->pt = info->sp + 16;
+ if ((pr & (1UL << PRED_NON_SYSCALL)) != 0)
+ num_regs = *info->cfm_loc & 0x7f; /* size of frame */
+ info->pfs_loc =
+ (unsigned long *) (info->pt + offsetof(struct pt_regs, ar_pfs));
+ UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n", __FUNCTION__, info->pt);
+ } else
+ num_regs = (*info->cfm_loc >> 7) & 0x7f; /* size of locals */
+ info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->bsp, -num_regs);
+ if (info->bsp < info->regstk.limit || info->bsp > info->regstk.top) {
+ UNW_DPRINT(0, "unwind.%s: bsp (0x%lx) out of range [0x%lx-0x%lx]\n",
+ __FUNCTION__, info->bsp, info->regstk.limit, info->regstk.top);
+ STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
+ return -1;
+ }
+
+ /* restore the sp: */
+ info->sp = info->psp;
+ if (info->sp < info->memstk.top || info->sp > info->memstk.limit) {
+ UNW_DPRINT(0, "unwind.%s: sp (0x%lx) out of range [0x%lx-0x%lx]\n",
+ __FUNCTION__, info->sp, info->memstk.top, info->memstk.limit);
+ STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
+ return -1;
+ }
+
+ if (info->ip == prev_ip && info->sp == prev_sp && info->bsp == prev_bsp) {
+ UNW_DPRINT(0, "unwind.%s: ip, sp, bsp unchanged; stopping here (ip=0x%lx)\n",
+ __FUNCTION__, ip);
+ STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
+ return -1;
+ }
+
+ /* as we unwind, the saved ar.unat becomes the primary unat: */
+ info->pri_unat_loc = info->unat_loc;
+
+ /* finally, restore the predicates: */
+ unw_get_pr(info, &info->pr);
+
+ retval = find_save_locs(info);
+ STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
+ return retval;
+}
+EXPORT_SYMBOL(unw_unwind);
+
+int
+unw_unwind_to_user (struct unw_frame_info *info)
+{
+ unsigned long ip, sp;
+
+ while (unw_unwind(info) >= 0) {
+ if (unw_get_rp(info, &ip) < 0) {
+ unw_get_ip(info, &ip);
+ UNW_DPRINT(0, "unwind.%s: failed to read return pointer (ip=0x%lx)\n",
+ __FUNCTION__, ip);
+ return -1;
+ }
+ unw_get_sp(info, &sp);
+ if (sp >= (unsigned long)info->task + IA64_STK_OFFSET)
+ break;
+ if (ip < FIXADDR_USER_END)
+ return 0;
+ }
+ unw_get_ip(info, &ip);
+ UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", __FUNCTION__, ip);
+ return -1;
+}
+EXPORT_SYMBOL(unw_unwind_to_user);
+
+static void
+init_frame_info (struct unw_frame_info *info, struct task_struct *t,
+ struct switch_stack *sw, unsigned long stktop)
+{
+ unsigned long rbslimit, rbstop, stklimit;
+ STAT(unsigned long start, flags;)
+
+ STAT(local_irq_save(flags); ++unw.stat.api.inits; start = ia64_get_itc());
+
+ /*
+ * Subtle stuff here: we _could_ unwind through the switch_stack frame but we
+ * don't want to do that because it would be slow as each preserved register would
+ * have to be processed. Instead, what we do here is zero out the frame info and
+ * start the unwind process at the function that created the switch_stack frame.
+ * When a preserved value in switch_stack needs to be accessed, run_script() will
+ * initialize the appropriate pointer on demand.
+ */
+ memset(info, 0, sizeof(*info));
+
+ rbslimit = (unsigned long) t + IA64_RBS_OFFSET;
+ rbstop = sw->ar_bspstore;
+ if (rbstop - (unsigned long) t >= IA64_STK_OFFSET)
+ rbstop = rbslimit;
+
+ stklimit = (unsigned long) t + IA64_STK_OFFSET;
+ if (stktop <= rbstop)
+ stktop = rbstop;
+
+ info->regstk.limit = rbslimit;
+ info->regstk.top = rbstop;
+ info->memstk.limit = stklimit;
+ info->memstk.top = stktop;
+ info->task = t;
+ info->sw = sw;
+ info->sp = info->psp = stktop;
+ info->pr = sw->pr;
+ UNW_DPRINT(3, "unwind.%s:\n"
+ " task 0x%lx\n"
+ " rbs = [0x%lx-0x%lx)\n"
+ " stk = [0x%lx-0x%lx)\n"
+ " pr 0x%lx\n"
+ " sw 0x%lx\n"
+ " sp 0x%lx\n",
+ __FUNCTION__, (unsigned long) t, rbslimit, rbstop, stktop, stklimit,
+ info->pr, (unsigned long) info->sw, info->sp);
+ STAT(unw.stat.api.init_time += ia64_get_itc() - start; local_irq_restore(flags));
+}
+
+void
+unw_init_from_interruption (struct unw_frame_info *info, struct task_struct *t,
+ struct pt_regs *pt, struct switch_stack *sw)
+{
+ unsigned long sof;
+
+ init_frame_info(info, t, sw, pt->r12);
+ info->cfm_loc = &pt->cr_ifs;
+ info->unat_loc = &pt->ar_unat;
+ info->pfs_loc = &pt->ar_pfs;
+ sof = *info->cfm_loc & 0x7f;
+ info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sof);
+ info->ip = pt->cr_iip + ia64_psr(pt)->ri;
+ info->pt = (unsigned long) pt;
+ UNW_DPRINT(3, "unwind.%s:\n"
+ " bsp 0x%lx\n"
+ " sof 0x%lx\n"
+ " ip 0x%lx\n",
+ __FUNCTION__, info->bsp, sof, info->ip);
+ find_save_locs(info);
+}
+
+void
+unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, struct switch_stack *sw)
+{
+ unsigned long sol;
+
+ init_frame_info(info, t, sw, (unsigned long) (sw + 1) - 16);
+ info->cfm_loc = &sw->ar_pfs;
+ sol = (*info->cfm_loc >> 7) & 0x7f;
+ info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sol);
+ info->ip = sw->b0;
+ UNW_DPRINT(3, "unwind.%s:\n"
+ " bsp 0x%lx\n"
+ " sol 0x%lx\n"
+ " ip 0x%lx\n",
+ __FUNCTION__, info->bsp, sol, info->ip);
+ find_save_locs(info);
+}
+
+EXPORT_SYMBOL(unw_init_frame_info);
+
+void
+unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t)
+{
+ struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16);
+
+ UNW_DPRINT(1, "unwind.%s\n", __FUNCTION__);
+ unw_init_frame_info(info, t, sw);
+}
+EXPORT_SYMBOL(unw_init_from_blocked_task);
+
+static void
+init_unwind_table (struct unw_table *table, const char *name, unsigned long segment_base,
+ unsigned long gp, const void *table_start, const void *table_end)
+{
+ const struct unw_table_entry *start = table_start, *end = table_end;
+
+ table->name = name;
+ table->segment_base = segment_base;
+ table->gp = gp;
+ table->start = segment_base + start[0].start_offset;
+ table->end = segment_base + end[-1].end_offset;
+ table->array = start;
+ table->length = end - start;
+}
+
+void *
+unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp,
+ const void *table_start, const void *table_end)
+{
+ const struct unw_table_entry *start = table_start, *end = table_end;
+ struct unw_table *table;
+ unsigned long flags;
+
+ if (end - start <= 0) {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to insert empty unwind table\n",
+ __FUNCTION__);
+ return NULL;
+ }
+
+ table = kmalloc(sizeof(*table), GFP_USER);
+ if (!table)
+ return NULL;
+
+ init_unwind_table(table, name, segment_base, gp, table_start, table_end);
+
+ spin_lock_irqsave(&unw.lock, flags);
+ {
+ /* keep kernel unwind table at the front (it's searched most commonly): */
+ table->next = unw.tables->next;
+ unw.tables->next = table;
+ }
+ spin_unlock_irqrestore(&unw.lock, flags);
+
+ return table;
+}
+
+void
+unw_remove_unwind_table (void *handle)
+{
+ struct unw_table *table, *prev;
+ struct unw_script *tmp;
+ unsigned long flags;
+ long index;
+
+ if (!handle) {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to remove non-existent unwind table\n",
+ __FUNCTION__);
+ return;
+ }
+
+ table = handle;
+ if (table == &unw.kernel_table) {
+ UNW_DPRINT(0, "unwind.%s: sorry, freeing the kernel's unwind table is a "
+ "no-can-do!\n", __FUNCTION__);
+ return;
+ }
+
+ spin_lock_irqsave(&unw.lock, flags);
+ {
+ /* first, delete the table: */
+
+ for (prev = (struct unw_table *) &unw.tables; prev; prev = prev->next)
+ if (prev->next == table)
+ break;
+ if (!prev) {
+ UNW_DPRINT(0, "unwind.%s: failed to find unwind table %p\n",
+ __FUNCTION__, (void *) table);
+ spin_unlock_irqrestore(&unw.lock, flags);
+ return;
+ }
+ prev->next = table->next;
+ }
+ spin_unlock_irqrestore(&unw.lock, flags);
+
+ /* next, remove hash table entries for this table */
+
+ for (index = 0; index <= UNW_HASH_SIZE; ++index) {
+ tmp = unw.cache + unw.hash[index];
+ if (unw.hash[index] >= UNW_CACHE_SIZE
+ || tmp->ip < table->start || tmp->ip >= table->end)
+ continue;
+
+ write_lock(&tmp->lock);
+ {
+ if (tmp->ip >= table->start && tmp->ip < table->end) {
+ unw.hash[index] = tmp->coll_chain;
+ tmp->ip = 0;
+ }
+ }
+ write_unlock(&tmp->lock);
+ }
+
+ kfree(table);
+}
+
+static int __init
+create_gate_table (void)
+{
+ const struct unw_table_entry *entry, *start, *end;
+ unsigned long *lp, segbase = GATE_ADDR;
+ size_t info_size, size;
+ char *info;
+ Elf64_Phdr *punw = NULL, *phdr = (Elf64_Phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+ int i;
+
+ for (i = 0; i < GATE_EHDR->e_phnum; ++i, ++phdr)
+ if (phdr->p_type == PT_IA_64_UNWIND) {
+ punw = phdr;
+ break;
+ }
+
+ if (!punw) {
+ printk("%s: failed to find gate DSO's unwind table!\n", __FUNCTION__);
+ return 0;
+ }
+
+ start = (const struct unw_table_entry *) punw->p_vaddr;
+ end = (struct unw_table_entry *) ((char *) start + punw->p_memsz);
+ size = 0;
+
+ unw_add_unwind_table("linux-gate.so", segbase, 0, start, end);
+
+ for (entry = start; entry < end; ++entry)
+ size += 3*8 + 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset));
+ size += 8; /* reserve space for "end of table" marker */
+
+ unw.gate_table = kmalloc(size, GFP_KERNEL);
+ if (!unw.gate_table) {
+ unw.gate_table_size = 0;
+ printk(KERN_ERR "%s: unable to create unwind data for gate page!\n", __FUNCTION__);
+ return 0;
+ }
+ unw.gate_table_size = size;
+
+ lp = unw.gate_table;
+ info = (char *) unw.gate_table + size;
+
+ for (entry = start; entry < end; ++entry, lp += 3) {
+ info_size = 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset));
+ info -= info_size;
+ memcpy(info, (char *) segbase + entry->info_offset, info_size);
+
+ lp[0] = segbase + entry->start_offset; /* start */
+ lp[1] = segbase + entry->end_offset; /* end */
+ lp[2] = info - (char *) unw.gate_table; /* info */
+ }
+ *lp = 0; /* end-of-table marker */
+ return 0;
+}
+
+__initcall(create_gate_table);
+
+void __init
+unw_init (void)
+{
+ extern char __gp[];
+ extern void unw_hash_index_t_is_too_narrow (void);
+ long i, off;
+
+ if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE)
+ unw_hash_index_t_is_too_narrow();
+
+ unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(AR_UNAT);
+ unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE);
+ unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_UNAT);
+ unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0);
+ unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(AR_UNAT);
+ unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR);
+ unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC);
+ unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR);
+ for (i = UNW_REG_R4, off = SW(R4); i <= UNW_REG_R7; ++i, off += 8)
+ unw.sw_off[unw.preg_index[i]] = off;
+ for (i = UNW_REG_B1, off = SW(B1); i <= UNW_REG_B5; ++i, off += 8)
+ unw.sw_off[unw.preg_index[i]] = off;
+ for (i = UNW_REG_F2, off = SW(F2); i <= UNW_REG_F5; ++i, off += 16)
+ unw.sw_off[unw.preg_index[i]] = off;
+ for (i = UNW_REG_F16, off = SW(F16); i <= UNW_REG_F31; ++i, off += 16)
+ unw.sw_off[unw.preg_index[i]] = off;
+
+ for (i = 0; i < UNW_CACHE_SIZE; ++i) {
+ if (i > 0)
+ unw.cache[i].lru_chain = (i - 1);
+ unw.cache[i].coll_chain = -1;
+ rwlock_init(&unw.cache[i].lock);
+ }
+ unw.lru_head = UNW_CACHE_SIZE - 1;
+ unw.lru_tail = 0;
+
+ init_unwind_table(&unw.kernel_table, "kernel", KERNEL_START, (unsigned long) __gp,
+ __start_unwind, __end_unwind);
+}
+
+/*
+ * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED
+ *
+ * This system call has been deprecated. The new and improved way to get
+ * at the kernel's unwind info is via the gate DSO. The address of the
+ * ELF header for this DSO is passed to user-level via AT_SYSINFO_EHDR.
+ *
+ * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED
+ *
+ * This system call copies the unwind data into the buffer pointed to by BUF and returns
+ * the size of the unwind data. If BUF_SIZE is smaller than the size of the unwind data
+ * or if BUF is NULL, nothing is copied, but the system call still returns the size of the
+ * unwind data.
+ *
+ * The first portion of the unwind data contains an unwind table and rest contains the
+ * associated unwind info (in no particular order). The unwind table consists of a table
+ * of entries of the form:
+ *
+ * u64 start; (64-bit address of start of function)
+ * u64 end; (64-bit address of start of function)
+ * u64 info; (BUF-relative offset to unwind info)
+ *
+ * The end of the unwind table is indicated by an entry with a START address of zero.
+ *
+ * Please see the IA-64 Software Conventions and Runtime Architecture manual for details
+ * on the format of the unwind info.
+ *
+ * ERRORS
+ * EFAULT BUF points outside your accessible address space.
+ */
+asmlinkage long
+sys_getunwind (void __user *buf, size_t buf_size)
+{
+ if (buf && buf_size >= unw.gate_table_size)
+ if (copy_to_user(buf, unw.gate_table, unw.gate_table_size) != 0)
+ return -EFAULT;
+ return unw.gate_table_size;
+}
diff --git a/arch/ia64/kernel/unwind_decoder.c b/arch/ia64/kernel/unwind_decoder.c
new file mode 100644
index 00000000000..50ac2d82f9b
--- /dev/null
+++ b/arch/ia64/kernel/unwind_decoder.c
@@ -0,0 +1,459 @@
+/*
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Generic IA-64 unwind info decoder.
+ *
+ * This file is used both by the Linux kernel and objdump. Please keep
+ * the two copies of this file in sync.
+ *
+ * You need to customize the decoder by defining the following
+ * macros/constants before including this file:
+ *
+ * Types:
+ * unw_word Unsigned integer type with at least 64 bits
+ *
+ * Register names:
+ * UNW_REG_BSP
+ * UNW_REG_BSPSTORE
+ * UNW_REG_FPSR
+ * UNW_REG_LC
+ * UNW_REG_PFS
+ * UNW_REG_PR
+ * UNW_REG_RNAT
+ * UNW_REG_PSP
+ * UNW_REG_RP
+ * UNW_REG_UNAT
+ *
+ * Decoder action macros:
+ * UNW_DEC_BAD_CODE(code)
+ * UNW_DEC_ABI(fmt,abi,context,arg)
+ * UNW_DEC_BR_GR(fmt,brmask,gr,arg)
+ * UNW_DEC_BR_MEM(fmt,brmask,arg)
+ * UNW_DEC_COPY_STATE(fmt,label,arg)
+ * UNW_DEC_EPILOGUE(fmt,t,ecount,arg)
+ * UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg)
+ * UNW_DEC_FR_MEM(fmt,frmask,arg)
+ * UNW_DEC_GR_GR(fmt,grmask,gr,arg)
+ * UNW_DEC_GR_MEM(fmt,grmask,arg)
+ * UNW_DEC_LABEL_STATE(fmt,label,arg)
+ * UNW_DEC_MEM_STACK_F(fmt,t,size,arg)
+ * UNW_DEC_MEM_STACK_V(fmt,t,arg)
+ * UNW_DEC_PRIUNAT_GR(fmt,r,arg)
+ * UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg)
+ * UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg)
+ * UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg)
+ * UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg)
+ * UNW_DEC_PROLOGUE(fmt,body,rlen,arg)
+ * UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg)
+ * UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg)
+ * UNW_DEC_REG_REG(fmt,src,dst,arg)
+ * UNW_DEC_REG_SPREL(fmt,reg,spoff,arg)
+ * UNW_DEC_REG_WHEN(fmt,reg,t,arg)
+ * UNW_DEC_RESTORE(fmt,t,abreg,arg)
+ * UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg)
+ * UNW_DEC_SPILL_BASE(fmt,pspoff,arg)
+ * UNW_DEC_SPILL_MASK(fmt,imaskp,arg)
+ * UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg)
+ * UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg)
+ * UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg)
+ * UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg)
+ * UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg)
+ * UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg)
+ */
+
+static unw_word
+unw_decode_uleb128 (unsigned char **dpp)
+{
+ unsigned shift = 0;
+ unw_word byte, result = 0;
+ unsigned char *bp = *dpp;
+
+ while (1)
+ {
+ byte = *bp++;
+ result |= (byte & 0x7f) << shift;
+ if ((byte & 0x80) == 0)
+ break;
+ shift += 7;
+ }
+ *dpp = bp;
+ return result;
+}
+
+static unsigned char *
+unw_decode_x1 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unsigned char byte1, abreg;
+ unw_word t, off;
+
+ byte1 = *dp++;
+ t = unw_decode_uleb128 (&dp);
+ off = unw_decode_uleb128 (&dp);
+ abreg = (byte1 & 0x7f);
+ if (byte1 & 0x80)
+ UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg);
+ else
+ UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_x2 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unsigned char byte1, byte2, abreg, x, ytreg;
+ unw_word t;
+
+ byte1 = *dp++; byte2 = *dp++;
+ t = unw_decode_uleb128 (&dp);
+ abreg = (byte1 & 0x7f);
+ ytreg = byte2;
+ x = (byte1 >> 7) & 1;
+ if ((byte1 & 0x80) == 0 && ytreg == 0)
+ UNW_DEC_RESTORE(X2, t, abreg, arg);
+ else
+ UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_x3 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unsigned char byte1, byte2, abreg, qp;
+ unw_word t, off;
+
+ byte1 = *dp++; byte2 = *dp++;
+ t = unw_decode_uleb128 (&dp);
+ off = unw_decode_uleb128 (&dp);
+
+ qp = (byte1 & 0x3f);
+ abreg = (byte2 & 0x7f);
+
+ if (byte1 & 0x80)
+ UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg);
+ else
+ UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_x4 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg;
+ unw_word t;
+
+ byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
+ t = unw_decode_uleb128 (&dp);
+
+ qp = (byte1 & 0x3f);
+ abreg = (byte2 & 0x7f);
+ x = (byte2 >> 7) & 1;
+ ytreg = byte3;
+
+ if ((byte2 & 0x80) == 0 && byte3 == 0)
+ UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg);
+ else
+ UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg)
+{
+ int body = (code & 0x20) != 0;
+ unw_word rlen;
+
+ rlen = (code & 0x1f);
+ UNW_DEC_PROLOGUE(R1, body, rlen, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unsigned char byte1, mask, grsave;
+ unw_word rlen;
+
+ byte1 = *dp++;
+
+ mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
+ grsave = (byte1 & 0x7f);
+ rlen = unw_decode_uleb128 (&dp);
+ UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unw_word rlen;
+
+ rlen = unw_decode_uleb128 (&dp);
+ UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unsigned char brmask = (code & 0x1f);
+
+ UNW_DEC_BR_MEM(P1, brmask, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg)
+{
+ if ((code & 0x10) == 0)
+ {
+ unsigned char byte1 = *dp++;
+
+ UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1),
+ (byte1 & 0x7f), arg);
+ }
+ else if ((code & 0x08) == 0)
+ {
+ unsigned char byte1 = *dp++, r, dst;
+
+ r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
+ dst = (byte1 & 0x7f);
+ switch (r)
+ {
+ case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break;
+ case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break;
+ case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break;
+ case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break;
+ case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break;
+ case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break;
+ case 6: UNW_DEC_RP_BR(P3, dst, arg); break;
+ case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break;
+ case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break;
+ case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break;
+ case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break;
+ case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break;
+ default: UNW_DEC_BAD_CODE(r); break;
+ }
+ }
+ else if ((code & 0x7) == 0)
+ UNW_DEC_SPILL_MASK(P4, dp, arg);
+ else if ((code & 0x7) == 1)
+ {
+ unw_word grmask, frmask, byte1, byte2, byte3;
+
+ byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
+ grmask = ((byte1 >> 4) & 0xf);
+ frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3;
+ UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg);
+ }
+ else
+ UNW_DEC_BAD_CODE(code);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg)
+{
+ int gregs = (code & 0x10) != 0;
+ unsigned char mask = (code & 0x0f);
+
+ if (gregs)
+ UNW_DEC_GR_MEM(P6, mask, arg);
+ else
+ UNW_DEC_FR_MEM(P6, mask, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unsigned char r, byte1, byte2;
+ unw_word t, size;
+
+ if ((code & 0x10) == 0)
+ {
+ r = (code & 0xf);
+ t = unw_decode_uleb128 (&dp);
+ switch (r)
+ {
+ case 0:
+ size = unw_decode_uleb128 (&dp);
+ UNW_DEC_MEM_STACK_F(P7, t, size, arg);
+ break;
+
+ case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break;
+ case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break;
+ case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break;
+ case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break;
+ case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break;
+ case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break;
+ case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break;
+ case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break;
+ case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break;
+ case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break;
+ case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break;
+ case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break;
+ case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break;
+ case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break;
+ case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break;
+ default: UNW_DEC_BAD_CODE(r); break;
+ }
+ }
+ else
+ {
+ switch (code & 0xf)
+ {
+ case 0x0: /* p8 */
+ {
+ r = *dp++;
+ t = unw_decode_uleb128 (&dp);
+ switch (r)
+ {
+ case 1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break;
+ case 2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break;
+ case 3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break;
+ case 4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break;
+ case 5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break;
+ case 6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break;
+ case 7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break;
+ case 8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break;
+ case 9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break;
+ case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break;
+ case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
+ case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
+ case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break;
+ case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break;
+ case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break;
+ case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break;
+ case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break;
+ case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break;
+ case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break;
+ default: UNW_DEC_BAD_CODE(r); break;
+ }
+ }
+ break;
+
+ case 0x1:
+ byte1 = *dp++; byte2 = *dp++;
+ UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg);
+ break;
+
+ case 0xf: /* p10 */
+ byte1 = *dp++; byte2 = *dp++;
+ UNW_DEC_ABI(P10, byte1, byte2, arg);
+ break;
+
+ case 0x9:
+ return unw_decode_x1 (dp, code, arg);
+
+ case 0xa:
+ return unw_decode_x2 (dp, code, arg);
+
+ case 0xb:
+ return unw_decode_x3 (dp, code, arg);
+
+ case 0xc:
+ return unw_decode_x4 (dp, code, arg);
+
+ default:
+ UNW_DEC_BAD_CODE(code);
+ break;
+ }
+ }
+ return dp;
+}
+
+static unsigned char *
+unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unw_word label = (code & 0x1f);
+
+ if ((code & 0x20) != 0)
+ UNW_DEC_COPY_STATE(B1, label, arg);
+ else
+ UNW_DEC_LABEL_STATE(B1, label, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unw_word t;
+
+ t = unw_decode_uleb128 (&dp);
+ UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unw_word t, ecount, label;
+
+ if ((code & 0x10) == 0)
+ {
+ t = unw_decode_uleb128 (&dp);
+ ecount = unw_decode_uleb128 (&dp);
+ UNW_DEC_EPILOGUE(B3, t, ecount, arg);
+ }
+ else if ((code & 0x07) == 0)
+ {
+ label = unw_decode_uleb128 (&dp);
+ if ((code & 0x08) != 0)
+ UNW_DEC_COPY_STATE(B4, label, arg);
+ else
+ UNW_DEC_LABEL_STATE(B4, label, arg);
+ }
+ else
+ switch (code & 0x7)
+ {
+ case 1: return unw_decode_x1 (dp, code, arg);
+ case 2: return unw_decode_x2 (dp, code, arg);
+ case 3: return unw_decode_x3 (dp, code, arg);
+ case 4: return unw_decode_x4 (dp, code, arg);
+ default: UNW_DEC_BAD_CODE(code); break;
+ }
+ return dp;
+}
+
+typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *);
+
+static unw_decoder unw_decode_table[2][8] =
+{
+ /* prologue table: */
+ {
+ unw_decode_r1, /* 0 */
+ unw_decode_r1,
+ unw_decode_r2,
+ unw_decode_r3,
+ unw_decode_p1, /* 4 */
+ unw_decode_p2_p5,
+ unw_decode_p6,
+ unw_decode_p7_p10
+ },
+ {
+ unw_decode_r1, /* 0 */
+ unw_decode_r1,
+ unw_decode_r2,
+ unw_decode_r3,
+ unw_decode_b1, /* 4 */
+ unw_decode_b1,
+ unw_decode_b2,
+ unw_decode_b3_x4
+ }
+};
+
+/*
+ * Decode one descriptor and return address of next descriptor.
+ */
+static inline unsigned char *
+unw_decode (unsigned char *dp, int inside_body, void *arg)
+{
+ unw_decoder decoder;
+ unsigned char code;
+
+ code = *dp++;
+ decoder = unw_decode_table[inside_body][code >> 5];
+ dp = (*decoder) (dp, code, arg);
+ return dp;
+}
diff --git a/arch/ia64/kernel/unwind_i.h b/arch/ia64/kernel/unwind_i.h
new file mode 100644
index 00000000000..96693a6ae37
--- /dev/null
+++ b/arch/ia64/kernel/unwind_i.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Kernel unwind support.
+ */
+
+#define UNW_VER(x) ((x) >> 48)
+#define UNW_FLAG_MASK 0x0000ffff00000000
+#define UNW_FLAG_OSMASK 0x0000f00000000000
+#define UNW_FLAG_EHANDLER(x) ((x) & 0x0000000100000000L)
+#define UNW_FLAG_UHANDLER(x) ((x) & 0x0000000200000000L)
+#define UNW_LENGTH(x) ((x) & 0x00000000ffffffffL)
+
+enum unw_register_index {
+ /* primary unat: */
+ UNW_REG_PRI_UNAT_GR,
+ UNW_REG_PRI_UNAT_MEM,
+
+ /* register stack */
+ UNW_REG_BSP, /* register stack pointer */
+ UNW_REG_BSPSTORE,
+ UNW_REG_PFS, /* previous function state */
+ UNW_REG_RNAT,
+ /* memory stack */
+ UNW_REG_PSP, /* previous memory stack pointer */
+ /* return pointer: */
+ UNW_REG_RP,
+
+ /* preserved registers: */
+ UNW_REG_R4, UNW_REG_R5, UNW_REG_R6, UNW_REG_R7,
+ UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR,
+ UNW_REG_B1, UNW_REG_B2, UNW_REG_B3, UNW_REG_B4, UNW_REG_B5,
+ UNW_REG_F2, UNW_REG_F3, UNW_REG_F4, UNW_REG_F5,
+ UNW_REG_F16, UNW_REG_F17, UNW_REG_F18, UNW_REG_F19,
+ UNW_REG_F20, UNW_REG_F21, UNW_REG_F22, UNW_REG_F23,
+ UNW_REG_F24, UNW_REG_F25, UNW_REG_F26, UNW_REG_F27,
+ UNW_REG_F28, UNW_REG_F29, UNW_REG_F30, UNW_REG_F31,
+ UNW_NUM_REGS
+};
+
+struct unw_info_block {
+ u64 header;
+ u64 desc[0]; /* unwind descriptors */
+ /* personality routine and language-specific data follow behind descriptors */
+};
+
+struct unw_table {
+ struct unw_table *next; /* must be first member! */
+ const char *name;
+ unsigned long gp; /* global pointer for this load-module */
+ unsigned long segment_base; /* base for offsets in the unwind table entries */
+ unsigned long start;
+ unsigned long end;
+ const struct unw_table_entry *array;
+ unsigned long length;
+};
+
+enum unw_where {
+ UNW_WHERE_NONE, /* register isn't saved at all */
+ UNW_WHERE_GR, /* register is saved in a general register */
+ UNW_WHERE_FR, /* register is saved in a floating-point register */
+ UNW_WHERE_BR, /* register is saved in a branch register */
+ UNW_WHERE_SPREL, /* register is saved on memstack (sp-relative) */
+ UNW_WHERE_PSPREL, /* register is saved on memstack (psp-relative) */
+ /*
+ * At the end of each prologue these locations get resolved to
+ * UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively:
+ */
+ UNW_WHERE_SPILL_HOME, /* register is saved in its spill home */
+ UNW_WHERE_GR_SAVE /* register is saved in next general register */
+};
+
+#define UNW_WHEN_NEVER 0x7fffffff
+
+struct unw_reg_info {
+ unsigned long val; /* save location: register number or offset */
+ enum unw_where where; /* where the register gets saved */
+ int when; /* when the register gets saved */
+};
+
+struct unw_reg_state {
+ struct unw_reg_state *next; /* next (outer) element on state stack */
+ struct unw_reg_info reg[UNW_NUM_REGS]; /* register save locations */
+};
+
+struct unw_labeled_state {
+ struct unw_labeled_state *next; /* next labeled state (or NULL) */
+ unsigned long label; /* label for this state */
+ struct unw_reg_state saved_state;
+};
+
+struct unw_state_record {
+ unsigned int first_region : 1; /* is this the first region? */
+ unsigned int done : 1; /* are we done scanning descriptors? */
+ unsigned int any_spills : 1; /* got any register spills? */
+ unsigned int in_body : 1; /* are we inside a body (as opposed to a prologue)? */
+ unsigned long flags; /* see UNW_FLAG_* in unwind.h */
+
+ u8 *imask; /* imask of spill_mask record or NULL */
+ unsigned long pr_val; /* predicate values */
+ unsigned long pr_mask; /* predicate mask */
+ long spill_offset; /* psp-relative offset for spill base */
+ int region_start;
+ int region_len;
+ int epilogue_start;
+ int epilogue_count;
+ int when_target;
+
+ u8 gr_save_loc; /* next general register to use for saving a register */
+ u8 return_link_reg; /* branch register in which the return link is passed */
+
+ struct unw_labeled_state *labeled_states; /* list of all labeled states */
+ struct unw_reg_state curr; /* current state */
+};
+
+enum unw_nat_type {
+ UNW_NAT_NONE, /* NaT not represented */
+ UNW_NAT_VAL, /* NaT represented by NaT value (fp reg) */
+ UNW_NAT_MEMSTK, /* NaT value is in unat word at offset OFF */
+ UNW_NAT_REGSTK /* NaT is in rnat */
+};
+
+enum unw_insn_opcode {
+ UNW_INSN_ADD, /* s[dst] += val */
+ UNW_INSN_ADD_PSP, /* s[dst] = (s.psp + val) */
+ UNW_INSN_ADD_SP, /* s[dst] = (s.sp + val) */
+ UNW_INSN_MOVE, /* s[dst] = s[val] */
+ UNW_INSN_MOVE2, /* s[dst] = s[val]; s[dst+1] = s[val+1] */
+ UNW_INSN_MOVE_STACKED, /* s[dst] = ia64_rse_skip(*s.bsp, val) */
+ UNW_INSN_SETNAT_MEMSTK, /* s[dst+1].nat.type = MEMSTK;
+ s[dst+1].nat.off = *s.pri_unat - s[dst] */
+ UNW_INSN_SETNAT_TYPE, /* s[dst+1].nat.type = val */
+ UNW_INSN_LOAD, /* s[dst] = *s[val] */
+ UNW_INSN_MOVE_SCRATCH, /* s[dst] = scratch reg "val" */
+ UNW_INSN_MOVE_CONST, /* s[dst] = constant reg "val" */
+};
+
+struct unw_insn {
+ unsigned int opc : 4;
+ unsigned int dst : 9;
+ signed int val : 19;
+};
+
+/*
+ * Preserved general static registers (r4-r7) give rise to two script
+ * instructions; everything else yields at most one instruction; at
+ * the end of the script, the psp gets popped, accounting for one more
+ * instruction.
+ */
+#define UNW_MAX_SCRIPT_LEN (UNW_NUM_REGS + 5)
+
+struct unw_script {
+ unsigned long ip; /* ip this script is for */
+ unsigned long pr_mask; /* mask of predicates script depends on */
+ unsigned long pr_val; /* predicate values this script is for */
+ rwlock_t lock;
+ unsigned int flags; /* see UNW_FLAG_* in unwind.h */
+ unsigned short lru_chain; /* used for least-recently-used chain */
+ unsigned short coll_chain; /* used for hash collisions */
+ unsigned short hint; /* hint for next script to try (or -1) */
+ unsigned short count; /* number of instructions in script */
+ struct unw_insn insn[UNW_MAX_SCRIPT_LEN];
+};
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
new file mode 100644
index 00000000000..b9f0db4c1b0
--- /dev/null
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -0,0 +1,251 @@
+#include <linux/config.h>
+
+#include <asm/cache.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+
+#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE)
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-ia64-little")
+OUTPUT_ARCH(ia64)
+ENTRY(phys_start)
+jiffies = jiffies_64;
+PHDRS {
+ code PT_LOAD;
+ percpu PT_LOAD;
+ data PT_LOAD;
+}
+SECTIONS
+{
+ /* Sections to be discarded */
+ /DISCARD/ : {
+ *(.exit.text)
+ *(.exit.data)
+ *(.exitcall.exit)
+ *(.IA_64.unwind.exit.text)
+ *(.IA_64.unwind_info.exit.text)
+ }
+
+ v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
+ phys_start = _start - LOAD_OFFSET;
+
+ code : { } :code
+ . = KERNEL_START;
+
+ _text = .;
+ _stext = .;
+
+ .text : AT(ADDR(.text) - LOAD_OFFSET)
+ {
+ *(.text.ivt)
+ *(.text)
+ SCHED_TEXT
+ LOCK_TEXT
+ *(.gnu.linkonce.t*)
+ }
+ .text2 : AT(ADDR(.text2) - LOAD_OFFSET)
+ { *(.text2) }
+#ifdef CONFIG_SMP
+ .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET)
+ { *(.text.lock) }
+#endif
+ _etext = .;
+
+ /* Read-only data */
+
+ /* Exception table */
+ . = ALIGN(16);
+ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET)
+ {
+ __start___ex_table = .;
+ *(__ex_table)
+ __stop___ex_table = .;
+ }
+
+ .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
+ {
+ __start___vtop_patchlist = .;
+ *(.data.patch.vtop)
+ __end___vtop_patchlist = .;
+ }
+
+ .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
+ {
+ __start___mckinley_e9_bundles = .;
+ *(.data.patch.mckinley_e9)
+ __end___mckinley_e9_bundles = .;
+ }
+
+ /* Global data */
+ _data = .;
+
+#if defined(CONFIG_IA64_GENERIC)
+ /* Machine Vector */
+ . = ALIGN(16);
+ .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
+ {
+ machvec_start = .;
+ *(.machvec)
+ machvec_end = .;
+ }
+#endif
+
+ /* Unwind info & table: */
+ . = ALIGN(8);
+ .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET)
+ { *(.IA_64.unwind_info*) }
+ .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET)
+ {
+ __start_unwind = .;
+ *(.IA_64.unwind*)
+ __end_unwind = .;
+ }
+
+ RODATA
+
+ .opd : AT(ADDR(.opd) - LOAD_OFFSET)
+ { *(.opd) }
+
+ /* Initialization code and data: */
+
+ . = ALIGN(PAGE_SIZE);
+ __init_begin = .;
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
+ {
+ _sinittext = .;
+ *(.init.text)
+ _einittext = .;
+ }
+
+ .init.data : AT(ADDR(.init.data) - LOAD_OFFSET)
+ { *(.init.data) }
+
+ .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET)
+ {
+ __initramfs_start = .;
+ *(.init.ramfs)
+ __initramfs_end = .;
+ }
+
+ . = ALIGN(16);
+ .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET)
+ {
+ __setup_start = .;
+ *(.init.setup)
+ __setup_end = .;
+ }
+ .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET)
+ {
+ __initcall_start = .;
+ *(.initcall1.init)
+ *(.initcall2.init)
+ *(.initcall3.init)
+ *(.initcall4.init)
+ *(.initcall5.init)
+ *(.initcall6.init)
+ *(.initcall7.init)
+ __initcall_end = .;
+ }
+ __con_initcall_start = .;
+ .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET)
+ { *(.con_initcall.init) }
+ __con_initcall_end = .;
+ __security_initcall_start = .;
+ .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET)
+ { *(.security_initcall.init) }
+ __security_initcall_end = .;
+ . = ALIGN(PAGE_SIZE);
+ __init_end = .;
+
+ /* The initial task and kernel stack */
+ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET)
+ { *(.data.init_task) }
+
+ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET)
+ { *(__special_page_section)
+ __start_gate_section = .;
+ *(.data.gate)
+ __stop_gate_section = .;
+ }
+ . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose kernel data */
+
+ .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET)
+ { *(.data.cacheline_aligned) }
+
+ /* Per-cpu data: */
+ percpu : { } :percpu
+ . = ALIGN(PERCPU_PAGE_SIZE);
+ __phys_per_cpu_start = .;
+ .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
+ {
+ __per_cpu_start = .;
+ *(.data.percpu)
+ __per_cpu_end = .;
+ }
+ . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into percpu page size */
+
+ data : { } :data
+ .data : AT(ADDR(.data) - LOAD_OFFSET)
+ { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS }
+
+ . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */
+ .got : AT(ADDR(.got) - LOAD_OFFSET)
+ { *(.got.plt) *(.got) }
+ __gp = ADDR(.got) + 0x200000;
+ /* We want the small data sections together, so single-instruction offsets
+ can access them all, and initialized data all before uninitialized, so
+ we can shorten the on-disk segment size. */
+ .sdata : AT(ADDR(.sdata) - LOAD_OFFSET)
+ { *(.sdata) *(.sdata1) *(.srdata) }
+ _edata = .;
+ _bss = .;
+ .sbss : AT(ADDR(.sbss) - LOAD_OFFSET)
+ { *(.sbss) *(.scommon) }
+ .bss : AT(ADDR(.bss) - LOAD_OFFSET)
+ { *(.bss) *(COMMON) }
+
+ _end = .;
+
+ code : { } :code
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* These must appear regardless of . */
+ /* Discard them for now since Intel SoftSDV cannot handle them.
+ .comment 0 : { *(.comment) }
+ .note 0 : { *(.note) }
+ */
+ /DISCARD/ : { *(.comment) }
+ /DISCARD/ : { *(.note) }
+}
diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile
new file mode 100644
index 00000000000..1902c3c2ef9
--- /dev/null
+++ b/arch/ia64/lib/Makefile
@@ -0,0 +1,52 @@
+#
+# Makefile for ia64-specific library routines..
+#
+
+obj-y := io.o
+
+lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \
+ __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \
+ bitop.o checksum.o clear_page.o csum_partial_copy.o copy_page.o \
+ clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \
+ flush.o ip_fast_csum.o do_csum.o \
+ memset.o strlen.o swiotlb.o
+
+lib-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o
+lib-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o
+lib-$(CONFIG_PERFMON) += carta_random.o
+lib-$(CONFIG_MD_RAID5) += xor.o
+lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
+
+AFLAGS___divdi3.o =
+AFLAGS___udivdi3.o = -DUNSIGNED
+AFLAGS___moddi3.o = -DMODULO
+AFLAGS___umoddi3.o = -DUNSIGNED -DMODULO
+
+AFLAGS___divsi3.o =
+AFLAGS___udivsi3.o = -DUNSIGNED
+AFLAGS___modsi3.o = -DMODULO
+AFLAGS___umodsi3.o = -DUNSIGNED -DMODULO
+
+$(obj)/__divdi3.o: $(src)/idiv64.S FORCE
+ $(call if_changed_dep,as_o_S)
+
+$(obj)/__udivdi3.o: $(src)/idiv64.S FORCE
+ $(call if_changed_dep,as_o_S)
+
+$(obj)/__moddi3.o: $(src)/idiv64.S FORCE
+ $(call if_changed_dep,as_o_S)
+
+$(obj)/__umoddi3.o: $(src)/idiv64.S FORCE
+ $(call if_changed_dep,as_o_S)
+
+$(obj)/__divsi3.o: $(src)/idiv32.S FORCE
+ $(call if_changed_dep,as_o_S)
+
+$(obj)/__udivsi3.o: $(src)/idiv32.S FORCE
+ $(call if_changed_dep,as_o_S)
+
+$(obj)/__modsi3.o: $(src)/idiv32.S FORCE
+ $(call if_changed_dep,as_o_S)
+
+$(obj)/__umodsi3.o: $(src)/idiv32.S FORCE
+ $(call if_changed_dep,as_o_S)
diff --git a/arch/ia64/lib/bitop.c b/arch/ia64/lib/bitop.c
new file mode 100644
index 00000000000..82e299c8464
--- /dev/null
+++ b/arch/ia64/lib/bitop.c
@@ -0,0 +1,88 @@
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/intrinsics.h>
+#include <linux/module.h>
+#include <linux/bitops.h>
+
+/*
+ * Find next zero bit in a bitmap reasonably efficiently..
+ */
+
+int __find_next_zero_bit (const void *addr, unsigned long size, unsigned long offset)
+{
+ unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
+ unsigned long result = offset & ~63UL;
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset &= 63UL;
+ if (offset) {
+ tmp = *(p++);
+ tmp |= ~0UL >> (64-offset);
+ if (size < 64)
+ goto found_first;
+ if (~tmp)
+ goto found_middle;
+ size -= 64;
+ result += 64;
+ }
+ while (size & ~63UL) {
+ if (~(tmp = *(p++)))
+ goto found_middle;
+ result += 64;
+ size -= 64;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+found_first:
+ tmp |= ~0UL << size;
+ if (tmp == ~0UL) /* any bits zero? */
+ return result + size; /* nope */
+found_middle:
+ return result + ffz(tmp);
+}
+EXPORT_SYMBOL(__find_next_zero_bit);
+
+/*
+ * Find next bit in a bitmap reasonably efficiently..
+ */
+int __find_next_bit(const void *addr, unsigned long size, unsigned long offset)
+{
+ unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
+ unsigned long result = offset & ~63UL;
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset &= 63UL;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= ~0UL << offset;
+ if (size < 64)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+ size -= 64;
+ result += 64;
+ }
+ while (size & ~63UL) {
+ if ((tmp = *(p++)))
+ goto found_middle;
+ result += 64;
+ size -= 64;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+ found_first:
+ tmp &= ~0UL >> (64-size);
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+ found_middle:
+ return result + __ffs(tmp);
+}
+EXPORT_SYMBOL(__find_next_bit);
diff --git a/arch/ia64/lib/carta_random.S b/arch/ia64/lib/carta_random.S
new file mode 100644
index 00000000000..d0674c36036
--- /dev/null
+++ b/arch/ia64/lib/carta_random.S
@@ -0,0 +1,54 @@
+/*
+ * Fast, simple, yet decent quality random number generator based on
+ * a paper by David G. Carta ("Two Fast Implementations of the
+ * `Minimal Standard' Random Number Generator," Communications of the
+ * ACM, January, 1990).
+ *
+ * Copyright (C) 2002 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/asmmacro.h>
+
+#define a r2
+#define m r3
+#define lo r8
+#define hi r9
+#define t0 r16
+#define t1 r17
+#define seed r32
+
+GLOBAL_ENTRY(carta_random32)
+ movl a = (16807 << 16) | 16807
+ ;;
+ pmpyshr2.u t0 = a, seed, 0
+ pmpyshr2.u t1 = a, seed, 16
+ ;;
+ unpack2.l t0 = t1, t0
+ dep m = -1, r0, 0, 31
+ ;;
+ zxt4 lo = t0
+ shr.u hi = t0, 32
+ ;;
+ dep t0 = 0, hi, 15, 49 // t0 = (hi & 0x7fff)
+ ;;
+ shl t0 = t0, 16 // t0 = (hi & 0x7fff) << 16
+ shr t1 = hi, 15 // t1 = (hi >> 15)
+ ;;
+ add lo = lo, t0
+ ;;
+ cmp.gtu p6, p0 = lo, m
+ ;;
+(p6) and lo = lo, m
+ ;;
+(p6) add lo = 1, lo
+ ;;
+ add lo = lo, t1
+ ;;
+ cmp.gtu p6, p0 = lo, m
+ ;;
+(p6) and lo = lo, m
+ ;;
+(p6) add lo = 1, lo
+ br.ret.sptk.many rp
+END(carta_random32)
diff --git a/arch/ia64/lib/checksum.c b/arch/ia64/lib/checksum.c
new file mode 100644
index 00000000000..beb11721d9f
--- /dev/null
+++ b/arch/ia64/lib/checksum.c
@@ -0,0 +1,102 @@
+/*
+ * Network checksum routines
+ *
+ * Copyright (C) 1999, 2003 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * Most of the code coming from arch/alpha/lib/checksum.c
+ *
+ * This file contains network checksum routines that are better done
+ * in an architecture-specific manner due to speed..
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <asm/byteorder.h>
+
+static inline unsigned short
+from64to16 (unsigned long x)
+{
+ /* add up 32-bit words for 33 bits */
+ x = (x & 0xffffffff) + (x >> 32);
+ /* add up 16-bit and 17-bit words for 17+c bits */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up 16-bit and 2-bit for 16+c bit */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up carry.. */
+ x = (x & 0xffff) + (x >> 16);
+ return x;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented.
+ */
+unsigned short int
+csum_tcpudp_magic (unsigned long saddr, unsigned long daddr, unsigned short len,
+ unsigned short proto, unsigned int sum)
+{
+ return ~from64to16(saddr + daddr + sum + ((unsigned long) ntohs(len) << 16) +
+ ((unsigned long) proto << 8));
+}
+
+EXPORT_SYMBOL(csum_tcpudp_magic);
+
+unsigned int
+csum_tcpudp_nofold (unsigned long saddr, unsigned long daddr, unsigned short len,
+ unsigned short proto, unsigned int sum)
+{
+ unsigned long result;
+
+ result = (saddr + daddr + sum +
+ ((unsigned long) ntohs(len) << 16) +
+ ((unsigned long) proto << 8));
+
+ /* Fold down to 32-bits so we don't lose in the typedef-less network stack. */
+ /* 64 to 33 */
+ result = (result & 0xffffffff) + (result >> 32);
+ /* 33 to 32 */
+ result = (result & 0xffffffff) + (result >> 32);
+ return result;
+}
+
+extern unsigned long do_csum (const unsigned char *, long);
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+unsigned int
+csum_partial (const unsigned char * buff, int len, unsigned int sum)
+{
+ unsigned long result = do_csum(buff, len);
+
+ /* add in old sum, and carry.. */
+ result += sum;
+ /* 32+c bits -> 32 bits */
+ result = (result & 0xffffffff) + (result >> 32);
+ return result;
+}
+
+EXPORT_SYMBOL(csum_partial);
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+unsigned short
+ip_compute_csum (unsigned char * buff, int len)
+{
+ return ~do_csum(buff,len);
+}
+
+EXPORT_SYMBOL(ip_compute_csum);
diff --git a/arch/ia64/lib/clear_page.S b/arch/ia64/lib/clear_page.S
new file mode 100644
index 00000000000..d4987061dda
--- /dev/null
+++ b/arch/ia64/lib/clear_page.S
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 1999-2002 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
+ *
+ * 1/06/01 davidm Tuned for Itanium.
+ * 2/12/02 kchen Tuned for both Itanium and McKinley
+ * 3/08/02 davidm Some more tweaking
+ */
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_ITANIUM
+# define L3_LINE_SIZE 64 // Itanium L3 line size
+# define PREFETCH_LINES 9 // magic number
+#else
+# define L3_LINE_SIZE 128 // McKinley L3 line size
+# define PREFETCH_LINES 12 // magic number
+#endif
+
+#define saved_lc r2
+#define dst_fetch r3
+#define dst1 r8
+#define dst2 r9
+#define dst3 r10
+#define dst4 r11
+
+#define dst_last r31
+
+GLOBAL_ENTRY(clear_page)
+ .prologue
+ .regstk 1,0,0,0
+ mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count, -1=repeat/until
+ .save ar.lc, saved_lc
+ mov saved_lc = ar.lc
+
+ .body
+ mov ar.lc = (PREFETCH_LINES - 1)
+ mov dst_fetch = in0
+ adds dst1 = 16, in0
+ adds dst2 = 32, in0
+ ;;
+.fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
+ adds dst3 = 48, in0 // executing this multiple times is harmless
+ br.cloop.sptk.few .fetch
+ ;;
+ addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
+ mov ar.lc = r16 // one L3 line per iteration
+ adds dst4 = 64, in0
+ ;;
+#ifdef CONFIG_ITANIUM
+ // Optimized for Itanium
+1: stf.spill.nta [dst1] = f0, 64
+ stf.spill.nta [dst2] = f0, 64
+ cmp.lt p8,p0=dst_fetch, dst_last
+ ;;
+#else
+ // Optimized for McKinley
+1: stf.spill.nta [dst1] = f0, 64
+ stf.spill.nta [dst2] = f0, 64
+ stf.spill.nta [dst3] = f0, 64
+ stf.spill.nta [dst4] = f0, 128
+ cmp.lt p8,p0=dst_fetch, dst_last
+ ;;
+ stf.spill.nta [dst1] = f0, 64
+ stf.spill.nta [dst2] = f0, 64
+#endif
+ stf.spill.nta [dst3] = f0, 64
+(p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
+ br.cloop.sptk.few 1b
+ ;;
+ mov ar.lc = saved_lc // restore lc
+ br.ret.sptk.many rp
+END(clear_page)
diff --git a/arch/ia64/lib/clear_user.S b/arch/ia64/lib/clear_user.S
new file mode 100644
index 00000000000..eecd8577b20
--- /dev/null
+++ b/arch/ia64/lib/clear_user.S
@@ -0,0 +1,209 @@
+/*
+ * This routine clears to zero a linear memory buffer in user space.
+ *
+ * Inputs:
+ * in0: address of buffer
+ * in1: length of buffer in bytes
+ * Outputs:
+ * r8: number of bytes that didn't get cleared due to a fault
+ *
+ * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ */
+
+#include <asm/asmmacro.h>
+
+//
+// arguments
+//
+#define buf r32
+#define len r33
+
+//
+// local registers
+//
+#define cnt r16
+#define buf2 r17
+#define saved_lc r18
+#define saved_pfs r19
+#define tmp r20
+#define len2 r21
+#define len3 r22
+
+//
+// Theory of operations:
+// - we check whether or not the buffer is small, i.e., less than 17
+// in which case we do the byte by byte loop.
+//
+// - Otherwise we go progressively from 1 byte store to 8byte store in
+// the head part, the body is a 16byte store loop and we finish we the
+// tail for the last 15 bytes.
+// The good point about this breakdown is that the long buffer handling
+// contains only 2 branches.
+//
+// The reason for not using shifting & masking for both the head and the
+// tail is to stay semantically correct. This routine is not supposed
+// to write bytes outside of the buffer. While most of the time this would
+// be ok, we can't tolerate a mistake. A classical example is the case
+// of multithreaded code were to the extra bytes touched is actually owned
+// by another thread which runs concurrently to ours. Another, less likely,
+// example is with device drivers where reading an I/O mapped location may
+// have side effects (same thing for writing).
+//
+
+GLOBAL_ENTRY(__do_clear_user)
+ .prologue
+ .save ar.pfs, saved_pfs
+ alloc saved_pfs=ar.pfs,2,0,0,0
+ cmp.eq p6,p0=r0,len // check for zero length
+ .save ar.lc, saved_lc
+ mov saved_lc=ar.lc // preserve ar.lc (slow)
+ .body
+ ;; // avoid WAW on CFM
+ adds tmp=-1,len // br.ctop is repeat/until
+ mov ret0=len // return value is length at this point
+(p6) br.ret.spnt.many rp
+ ;;
+ cmp.lt p6,p0=16,len // if len > 16 then long memset
+ mov ar.lc=tmp // initialize lc for small count
+(p6) br.cond.dptk .long_do_clear
+ ;; // WAR on ar.lc
+ //
+ // worst case 16 iterations, avg 8 iterations
+ //
+ // We could have played with the predicates to use the extra
+ // M slot for 2 stores/iteration but the cost the initialization
+ // the various counters compared to how long the loop is supposed
+ // to last on average does not make this solution viable.
+ //
+1:
+ EX( .Lexit1, st1 [buf]=r0,1 )
+ adds len=-1,len // countdown length using len
+ br.cloop.dptk 1b
+ ;; // avoid RAW on ar.lc
+ //
+ // .Lexit4: comes from byte by byte loop
+ // len contains bytes left
+.Lexit1:
+ mov ret0=len // faster than using ar.lc
+ mov ar.lc=saved_lc
+ br.ret.sptk.many rp // end of short clear_user
+
+
+ //
+ // At this point we know we have more than 16 bytes to copy
+ // so we focus on alignment (no branches required)
+ //
+ // The use of len/len2 for countdown of the number of bytes left
+ // instead of ret0 is due to the fact that the exception code
+ // changes the values of r8.
+ //
+.long_do_clear:
+ tbit.nz p6,p0=buf,0 // odd alignment (for long_do_clear)
+ ;;
+ EX( .Lexit3, (p6) st1 [buf]=r0,1 ) // 1-byte aligned
+(p6) adds len=-1,len;; // sync because buf is modified
+ tbit.nz p6,p0=buf,1
+ ;;
+ EX( .Lexit3, (p6) st2 [buf]=r0,2 ) // 2-byte aligned
+(p6) adds len=-2,len;;
+ tbit.nz p6,p0=buf,2
+ ;;
+ EX( .Lexit3, (p6) st4 [buf]=r0,4 ) // 4-byte aligned
+(p6) adds len=-4,len;;
+ tbit.nz p6,p0=buf,3
+ ;;
+ EX( .Lexit3, (p6) st8 [buf]=r0,8 ) // 8-byte aligned
+(p6) adds len=-8,len;;
+ shr.u cnt=len,4 // number of 128-bit (2x64bit) words
+ ;;
+ cmp.eq p6,p0=r0,cnt
+ adds tmp=-1,cnt
+(p6) br.cond.dpnt .dotail // we have less than 16 bytes left
+ ;;
+ adds buf2=8,buf // setup second base pointer
+ mov ar.lc=tmp
+ ;;
+
+ //
+ // 16bytes/iteration core loop
+ //
+ // The second store can never generate a fault because
+ // we come into the loop only when we are 16-byte aligned.
+ // This means that if we cross a page then it will always be
+ // in the first store and never in the second.
+ //
+ //
+ // We need to keep track of the remaining length. A possible (optimistic)
+ // way would be to use ar.lc and derive how many byte were left by
+ // doing : left= 16*ar.lc + 16. this would avoid the addition at
+ // every iteration.
+ // However we need to keep the synchronization point. A template
+ // M;;MB does not exist and thus we can keep the addition at no
+ // extra cycle cost (use a nop slot anyway). It also simplifies the
+ // (unlikely) error recovery code
+ //
+
+2: EX(.Lexit3, st8 [buf]=r0,16 )
+ ;; // needed to get len correct when error
+ st8 [buf2]=r0,16
+ adds len=-16,len
+ br.cloop.dptk 2b
+ ;;
+ mov ar.lc=saved_lc
+ //
+ // tail correction based on len only
+ //
+ // We alternate the use of len3,len2 to allow parallelism and correct
+ // error handling. We also reuse p6/p7 to return correct value.
+ // The addition of len2/len3 does not cost anything more compared to
+ // the regular memset as we had empty slots.
+ //
+.dotail:
+ mov len2=len // for parallelization of error handling
+ mov len3=len
+ tbit.nz p6,p0=len,3
+ ;;
+ EX( .Lexit2, (p6) st8 [buf]=r0,8 ) // at least 8 bytes
+(p6) adds len3=-8,len2
+ tbit.nz p7,p6=len,2
+ ;;
+ EX( .Lexit2, (p7) st4 [buf]=r0,4 ) // at least 4 bytes
+(p7) adds len2=-4,len3
+ tbit.nz p6,p7=len,1
+ ;;
+ EX( .Lexit2, (p6) st2 [buf]=r0,2 ) // at least 2 bytes
+(p6) adds len3=-2,len2
+ tbit.nz p7,p6=len,0
+ ;;
+ EX( .Lexit2, (p7) st1 [buf]=r0 ) // only 1 byte left
+ mov ret0=r0 // success
+ br.ret.sptk.many rp // end of most likely path
+
+ //
+ // Outlined error handling code
+ //
+
+ //
+ // .Lexit3: comes from core loop, need restore pr/lc
+ // len contains bytes left
+ //
+ //
+ // .Lexit2:
+ // if p6 -> coming from st8 or st2 : len2 contains what's left
+ // if p7 -> coming from st4 or st1 : len3 contains what's left
+ // We must restore lc/pr even though might not have been used.
+.Lexit2:
+ .pred.rel "mutex", p6, p7
+(p6) mov len=len2
+(p7) mov len=len3
+ ;;
+ //
+ // .Lexit4: comes from head, need not restore pr/lc
+ // len contains bytes left
+ //
+.Lexit3:
+ mov ret0=len
+ mov ar.lc=saved_lc
+ br.ret.sptk.many rp
+END(__do_clear_user)
diff --git a/arch/ia64/lib/copy_page.S b/arch/ia64/lib/copy_page.S
new file mode 100644
index 00000000000..127d1d050d7
--- /dev/null
+++ b/arch/ia64/lib/copy_page.S
@@ -0,0 +1,98 @@
+/*
+ *
+ * Optimized version of the standard copy_page() function
+ *
+ * Inputs:
+ * in0: address of target page
+ * in1: address of source page
+ * Output:
+ * no return value
+ *
+ * Copyright (C) 1999, 2001 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger <davidm@hpl.hp.com>
+ *
+ * 4/06/01 davidm Tuned to make it perform well both for cached and uncached copies.
+ */
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+#define PIPE_DEPTH 3
+#define EPI p[PIPE_DEPTH-1]
+
+#define lcount r16
+#define saved_pr r17
+#define saved_lc r18
+#define saved_pfs r19
+#define src1 r20
+#define src2 r21
+#define tgt1 r22
+#define tgt2 r23
+#define srcf r24
+#define tgtf r25
+#define tgt_last r26
+
+#define Nrot ((8*PIPE_DEPTH+7)&~7)
+
+GLOBAL_ENTRY(copy_page)
+ .prologue
+ .save ar.pfs, saved_pfs
+ alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
+
+ .rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \
+ t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH]
+ .rotp p[PIPE_DEPTH]
+
+ .save ar.lc, saved_lc
+ mov saved_lc=ar.lc
+ mov ar.ec=PIPE_DEPTH
+
+ mov lcount=PAGE_SIZE/64-1
+ .save pr, saved_pr
+ mov saved_pr=pr
+ mov pr.rot=1<<16
+
+ .body
+
+ mov src1=in1
+ adds src2=8,in1
+ mov tgt_last = PAGE_SIZE
+ ;;
+ adds tgt2=8,in0
+ add srcf=512,in1
+ mov ar.lc=lcount
+ mov tgt1=in0
+ add tgtf=512,in0
+ add tgt_last = tgt_last, in0
+ ;;
+1:
+(p[0]) ld8 t1[0]=[src1],16
+(EPI) st8 [tgt1]=t1[PIPE_DEPTH-1],16
+(p[0]) ld8 t2[0]=[src2],16
+(EPI) st8 [tgt2]=t2[PIPE_DEPTH-1],16
+ cmp.ltu p6,p0 = tgtf, tgt_last
+ ;;
+(p[0]) ld8 t3[0]=[src1],16
+(EPI) st8 [tgt1]=t3[PIPE_DEPTH-1],16
+(p[0]) ld8 t4[0]=[src2],16
+(EPI) st8 [tgt2]=t4[PIPE_DEPTH-1],16
+ ;;
+(p[0]) ld8 t5[0]=[src1],16
+(EPI) st8 [tgt1]=t5[PIPE_DEPTH-1],16
+(p[0]) ld8 t6[0]=[src2],16
+(EPI) st8 [tgt2]=t6[PIPE_DEPTH-1],16
+ ;;
+(p[0]) ld8 t7[0]=[src1],16
+(EPI) st8 [tgt1]=t7[PIPE_DEPTH-1],16
+(p[0]) ld8 t8[0]=[src2],16
+(EPI) st8 [tgt2]=t8[PIPE_DEPTH-1],16
+
+(p6) lfetch [srcf], 64
+(p6) lfetch [tgtf], 64
+ br.ctop.sptk.few 1b
+ ;;
+ mov pr=saved_pr,0xffffffffffff0000 // restore predicates
+ mov ar.pfs=saved_pfs
+ mov ar.lc=saved_lc
+ br.ret.sptk.many rp
+END(copy_page)
diff --git a/arch/ia64/lib/copy_page_mck.S b/arch/ia64/lib/copy_page_mck.S
new file mode 100644
index 00000000000..3c45d60a81b
--- /dev/null
+++ b/arch/ia64/lib/copy_page_mck.S
@@ -0,0 +1,185 @@
+/*
+ * McKinley-optimized version of copy_page().
+ *
+ * Copyright (C) 2002 Hewlett-Packard Co
+ * David Mosberger <davidm@hpl.hp.com>
+ *
+ * Inputs:
+ * in0: address of target page
+ * in1: address of source page
+ * Output:
+ * no return value
+ *
+ * General idea:
+ * - use regular loads and stores to prefetch data to avoid consuming M-slot just for
+ * lfetches => good for in-cache performance
+ * - avoid l2 bank-conflicts by not storing into the same 16-byte bank within a single
+ * cycle
+ *
+ * Principle of operation:
+ * First, note that L1 has a line-size of 64 bytes and L2 a line-size of 128 bytes.
+ * To avoid secondary misses in L2, we prefetch both source and destination with a line-size
+ * of 128 bytes. When both of these lines are in the L2 and the first half of the
+ * source line is in L1, we start copying the remaining words. The second half of the
+ * source line is prefetched in an earlier iteration, so that by the time we start
+ * accessing it, it's also present in the L1.
+ *
+ * We use a software-pipelined loop to control the overall operation. The pipeline
+ * has 2*PREFETCH_DIST+K stages. The first PREFETCH_DIST stages are used for prefetching
+ * source cache-lines. The second PREFETCH_DIST stages are used for prefetching destination
+ * cache-lines, the last K stages are used to copy the cache-line words not copied by
+ * the prefetches. The four relevant points in the pipelined are called A, B, C, D:
+ * p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a destination-line
+ * should be prefetched, p[C] is TRUE if the second half of an L2 line should be brought
+ * into L1D and p[D] is TRUE if a cacheline needs to be copied.
+ *
+ * This all sounds very complicated, but thanks to the modulo-scheduled loop support,
+ * the resulting code is very regular and quite easy to follow (once you get the idea).
+ *
+ * As a secondary optimization, the first 2*PREFETCH_DIST iterations are implemented
+ * as the separate .prefetch_loop. Logically, this loop performs exactly like the
+ * main-loop (.line_copy), but has all known-to-be-predicated-off instructions removed,
+ * so that each loop iteration is faster (again, good for cached case).
+ *
+ * When reading the code, it helps to keep the following picture in mind:
+ *
+ * word 0 word 1
+ * +------+------+---
+ * | v[x] | t1 | ^
+ * | t2 | t3 | |
+ * | t4 | t5 | |
+ * | t6 | t7 | | 128 bytes
+ * | n[y] | t9 | | (L2 cache line)
+ * | t10 | t11 | |
+ * | t12 | t13 | |
+ * | t14 | t15 | v
+ * +------+------+---
+ *
+ * Here, v[x] is copied by the (memory) prefetch. n[y] is loaded at p[C]
+ * to fetch the second-half of the L2 cache line into L1, and the tX words are copied in
+ * an order that avoids bank conflicts.
+ */
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+#define PREFETCH_DIST 8 // McKinley sustains 16 outstanding L2 misses (8 ld, 8 st)
+
+#define src0 r2
+#define src1 r3
+#define dst0 r9
+#define dst1 r10
+#define src_pre_mem r11
+#define dst_pre_mem r14
+#define src_pre_l2 r15
+#define dst_pre_l2 r16
+#define t1 r17
+#define t2 r18
+#define t3 r19
+#define t4 r20
+#define t5 t1 // alias!
+#define t6 t2 // alias!
+#define t7 t3 // alias!
+#define t9 t5 // alias!
+#define t10 t4 // alias!
+#define t11 t7 // alias!
+#define t12 t6 // alias!
+#define t14 t10 // alias!
+#define t13 r21
+#define t15 r22
+
+#define saved_lc r23
+#define saved_pr r24
+
+#define A 0
+#define B (PREFETCH_DIST)
+#define C (B + PREFETCH_DIST)
+#define D (C + 3)
+#define N (D + 1)
+#define Nrot ((N + 7) & ~7)
+
+GLOBAL_ENTRY(copy_page)
+ .prologue
+ alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot
+
+ .rotr v[2*PREFETCH_DIST], n[D-C+1]
+ .rotp p[N]
+
+ .save ar.lc, saved_lc
+ mov saved_lc = ar.lc
+ .save pr, saved_pr
+ mov saved_pr = pr
+ .body
+
+ mov src_pre_mem = in1
+ mov pr.rot = 0x10000
+ mov ar.ec = 1 // special unrolled loop
+
+ mov dst_pre_mem = in0
+ mov ar.lc = 2*PREFETCH_DIST - 1
+
+ add src_pre_l2 = 8*8, in1
+ add dst_pre_l2 = 8*8, in0
+ add src0 = 8, in1 // first t1 src
+ add src1 = 3*8, in1 // first t3 src
+ add dst0 = 8, in0 // first t1 dst
+ add dst1 = 3*8, in0 // first t3 dst
+ mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1
+ nop.m 0
+ nop.i 0
+ ;;
+ // same as .line_copy loop, but with all predicated-off instructions removed:
+.prefetch_loop:
+(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0
+(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2
+ br.ctop.sptk .prefetch_loop
+ ;;
+ cmp.eq p16, p0 = r0, r0 // reset p16 to 1 (br.ctop cleared it to zero)
+ mov ar.lc = t1 // with 64KB pages, t1 is too big to fit in 8 bits!
+ mov ar.ec = N // # of stages in pipeline
+ ;;
+.line_copy:
+(p[D]) ld8 t2 = [src0], 3*8 // M0
+(p[D]) ld8 t4 = [src1], 3*8 // M1
+(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 prefetch dst from memory
+(p[D]) st8 [dst_pre_l2] = n[D-C], 128 // M3 prefetch dst from L2
+ ;;
+(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 prefetch src from memory
+(p[C]) ld8 n[0] = [src_pre_l2], 128 // M1 prefetch src from L2
+(p[D]) st8 [dst0] = t1, 8 // M2
+(p[D]) st8 [dst1] = t3, 8 // M3
+ ;;
+(p[D]) ld8 t5 = [src0], 8
+(p[D]) ld8 t7 = [src1], 3*8
+(p[D]) st8 [dst0] = t2, 3*8
+(p[D]) st8 [dst1] = t4, 3*8
+ ;;
+(p[D]) ld8 t6 = [src0], 3*8
+(p[D]) ld8 t10 = [src1], 8
+(p[D]) st8 [dst0] = t5, 8
+(p[D]) st8 [dst1] = t7, 3*8
+ ;;
+(p[D]) ld8 t9 = [src0], 3*8
+(p[D]) ld8 t11 = [src1], 3*8
+(p[D]) st8 [dst0] = t6, 3*8
+(p[D]) st8 [dst1] = t10, 8
+ ;;
+(p[D]) ld8 t12 = [src0], 8
+(p[D]) ld8 t14 = [src1], 8
+(p[D]) st8 [dst0] = t9, 3*8
+(p[D]) st8 [dst1] = t11, 3*8
+ ;;
+(p[D]) ld8 t13 = [src0], 4*8
+(p[D]) ld8 t15 = [src1], 4*8
+(p[D]) st8 [dst0] = t12, 8
+(p[D]) st8 [dst1] = t14, 8
+ ;;
+(p[D-1])ld8 t1 = [src0], 8
+(p[D-1])ld8 t3 = [src1], 8
+(p[D]) st8 [dst0] = t13, 4*8
+(p[D]) st8 [dst1] = t15, 4*8
+ br.ctop.sptk .line_copy
+ ;;
+ mov ar.lc = saved_lc
+ mov pr = saved_pr, -1
+ br.ret.sptk.many rp
+END(copy_page)
diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S
new file mode 100644
index 00000000000..c952bdc6a09
--- /dev/null
+++ b/arch/ia64/lib/copy_user.S
@@ -0,0 +1,610 @@
+/*
+ *
+ * Optimized version of the copy_user() routine.
+ * It is used to copy date across the kernel/user boundary.
+ *
+ * The source and destination are always on opposite side of
+ * the boundary. When reading from user space we must catch
+ * faults on loads. When writing to user space we must catch
+ * errors on stores. Note that because of the nature of the copy
+ * we don't need to worry about overlapping regions.
+ *
+ *
+ * Inputs:
+ * in0 address of source buffer
+ * in1 address of destination buffer
+ * in2 number of bytes to copy
+ *
+ * Outputs:
+ * ret0 0 in case of success. The number of bytes NOT copied in
+ * case of error.
+ *
+ * Copyright (C) 2000-2001 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * Fixme:
+ * - handle the case where we have more than 16 bytes and the alignment
+ * are different.
+ * - more benchmarking
+ * - fix extraneous stop bit introduced by the EX() macro.
+ */
+
+#include <asm/asmmacro.h>
+
+//
+// Tuneable parameters
+//
+#define COPY_BREAK 16 // we do byte copy below (must be >=16)
+#define PIPE_DEPTH 21 // pipe depth
+
+#define EPI p[PIPE_DEPTH-1]
+
+//
+// arguments
+//
+#define dst in0
+#define src in1
+#define len in2
+
+//
+// local registers
+//
+#define t1 r2 // rshift in bytes
+#define t2 r3 // lshift in bytes
+#define rshift r14 // right shift in bits
+#define lshift r15 // left shift in bits
+#define word1 r16
+#define word2 r17
+#define cnt r18
+#define len2 r19
+#define saved_lc r20
+#define saved_pr r21
+#define tmp r22
+#define val r23
+#define src1 r24
+#define dst1 r25
+#define src2 r26
+#define dst2 r27
+#define len1 r28
+#define enddst r29
+#define endsrc r30
+#define saved_pfs r31
+
+GLOBAL_ENTRY(__copy_user)
+ .prologue
+ .save ar.pfs, saved_pfs
+ alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7)
+
+ .rotr val1[PIPE_DEPTH],val2[PIPE_DEPTH]
+ .rotp p[PIPE_DEPTH]
+
+ adds len2=-1,len // br.ctop is repeat/until
+ mov ret0=r0
+
+ ;; // RAW of cfm when len=0
+ cmp.eq p8,p0=r0,len // check for zero length
+ .save ar.lc, saved_lc
+ mov saved_lc=ar.lc // preserve ar.lc (slow)
+(p8) br.ret.spnt.many rp // empty mempcy()
+ ;;
+ add enddst=dst,len // first byte after end of source
+ add endsrc=src,len // first byte after end of destination
+ .save pr, saved_pr
+ mov saved_pr=pr // preserve predicates
+
+ .body
+
+ mov dst1=dst // copy because of rotation
+ mov ar.ec=PIPE_DEPTH
+ mov pr.rot=1<<16 // p16=true all others are false
+
+ mov src1=src // copy because of rotation
+ mov ar.lc=len2 // initialize lc for small count
+ cmp.lt p10,p7=COPY_BREAK,len // if len > COPY_BREAK then long copy
+
+ xor tmp=src,dst // same alignment test prepare
+(p10) br.cond.dptk .long_copy_user
+ ;; // RAW pr.rot/p16 ?
+ //
+ // Now we do the byte by byte loop with software pipeline
+ //
+ // p7 is necessarily false by now
+1:
+ EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
+ EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
+ br.ctop.dptk.few 1b
+ ;;
+ mov ar.lc=saved_lc
+ mov pr=saved_pr,0xffffffffffff0000
+ mov ar.pfs=saved_pfs // restore ar.ec
+ br.ret.sptk.many rp // end of short memcpy
+
+ //
+ // Not 8-byte aligned
+ //
+.diff_align_copy_user:
+ // At this point we know we have more than 16 bytes to copy
+ // and also that src and dest do _not_ have the same alignment.
+ and src2=0x7,src1 // src offset
+ and dst2=0x7,dst1 // dst offset
+ ;;
+ // The basic idea is that we copy byte-by-byte at the head so
+ // that we can reach 8-byte alignment for both src1 and dst1.
+ // Then copy the body using software pipelined 8-byte copy,
+ // shifting the two back-to-back words right and left, then copy
+ // the tail by copying byte-by-byte.
+ //
+ // Fault handling. If the byte-by-byte at the head fails on the
+ // load, then restart and finish the pipleline by copying zeros
+ // to the dst1. Then copy zeros for the rest of dst1.
+ // If 8-byte software pipeline fails on the load, do the same as
+ // failure_in3 does. If the byte-by-byte at the tail fails, it is
+ // handled simply by failure_in_pipe1.
+ //
+ // The case p14 represents the source has more bytes in the
+ // the first word (by the shifted part), whereas the p15 needs to
+ // copy some bytes from the 2nd word of the source that has the
+ // tail of the 1st of the destination.
+ //
+
+ //
+ // Optimization. If dst1 is 8-byte aligned (quite common), we don't need
+ // to copy the head to dst1, to start 8-byte copy software pipeline.
+ // We know src1 is not 8-byte aligned in this case.
+ //
+ cmp.eq p14,p15=r0,dst2
+(p15) br.cond.spnt 1f
+ ;;
+ sub t1=8,src2
+ mov t2=src2
+ ;;
+ shl rshift=t2,3
+ sub len1=len,t1 // set len1
+ ;;
+ sub lshift=64,rshift
+ ;;
+ br.cond.spnt .word_copy_user
+ ;;
+1:
+ cmp.leu p14,p15=src2,dst2
+ sub t1=dst2,src2
+ ;;
+ .pred.rel "mutex", p14, p15
+(p14) sub word1=8,src2 // (8 - src offset)
+(p15) sub t1=r0,t1 // absolute value
+(p15) sub word1=8,dst2 // (8 - dst offset)
+ ;;
+ // For the case p14, we don't need to copy the shifted part to
+ // the 1st word of destination.
+ sub t2=8,t1
+(p14) sub word1=word1,t1
+ ;;
+ sub len1=len,word1 // resulting len
+(p15) shl rshift=t1,3 // in bits
+(p14) shl rshift=t2,3
+ ;;
+(p14) sub len1=len1,t1
+ adds cnt=-1,word1
+ ;;
+ sub lshift=64,rshift
+ mov ar.ec=PIPE_DEPTH
+ mov pr.rot=1<<16 // p16=true all others are false
+ mov ar.lc=cnt
+ ;;
+2:
+ EX(.failure_in_pipe2,(p16) ld1 val1[0]=[src1],1)
+ EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
+ br.ctop.dptk.few 2b
+ ;;
+ clrrrb
+ ;;
+.word_copy_user:
+ cmp.gtu p9,p0=16,len1
+(p9) br.cond.spnt 4f // if (16 > len1) skip 8-byte copy
+ ;;
+ shr.u cnt=len1,3 // number of 64-bit words
+ ;;
+ adds cnt=-1,cnt
+ ;;
+ .pred.rel "mutex", p14, p15
+(p14) sub src1=src1,t2
+(p15) sub src1=src1,t1
+ //
+ // Now both src1 and dst1 point to an 8-byte aligned address. And
+ // we have more than 8 bytes to copy.
+ //
+ mov ar.lc=cnt
+ mov ar.ec=PIPE_DEPTH
+ mov pr.rot=1<<16 // p16=true all others are false
+ ;;
+3:
+ //
+ // The pipleline consists of 3 stages:
+ // 1 (p16): Load a word from src1
+ // 2 (EPI_1): Shift right pair, saving to tmp
+ // 3 (EPI): Store tmp to dst1
+ //
+ // To make it simple, use at least 2 (p16) loops to set up val1[n]
+ // because we need 2 back-to-back val1[] to get tmp.
+ // Note that this implies EPI_2 must be p18 or greater.
+ //
+
+#define EPI_1 p[PIPE_DEPTH-2]
+#define SWITCH(pred, shift) cmp.eq pred,p0=shift,rshift
+#define CASE(pred, shift) \
+ (pred) br.cond.spnt .copy_user_bit##shift
+#define BODY(rshift) \
+.copy_user_bit##rshift: \
+1: \
+ EX(.failure_out,(EPI) st8 [dst1]=tmp,8); \
+(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift; \
+ EX(3f,(p16) ld8 val1[1]=[src1],8); \
+(p16) mov val1[0]=r0; \
+ br.ctop.dptk 1b; \
+ ;; \
+ br.cond.sptk.many .diff_align_do_tail; \
+2: \
+(EPI) st8 [dst1]=tmp,8; \
+(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift; \
+3: \
+(p16) mov val1[1]=r0; \
+(p16) mov val1[0]=r0; \
+ br.ctop.dptk 2b; \
+ ;; \
+ br.cond.sptk.many .failure_in2
+
+ //
+ // Since the instruction 'shrp' requires a fixed 128-bit value
+ // specifying the bits to shift, we need to provide 7 cases
+ // below.
+ //
+ SWITCH(p6, 8)
+ SWITCH(p7, 16)
+ SWITCH(p8, 24)
+ SWITCH(p9, 32)
+ SWITCH(p10, 40)
+ SWITCH(p11, 48)
+ SWITCH(p12, 56)
+ ;;
+ CASE(p6, 8)
+ CASE(p7, 16)
+ CASE(p8, 24)
+ CASE(p9, 32)
+ CASE(p10, 40)
+ CASE(p11, 48)
+ CASE(p12, 56)
+ ;;
+ BODY(8)
+ BODY(16)
+ BODY(24)
+ BODY(32)
+ BODY(40)
+ BODY(48)
+ BODY(56)
+ ;;
+.diff_align_do_tail:
+ .pred.rel "mutex", p14, p15
+(p14) sub src1=src1,t1
+(p14) adds dst1=-8,dst1
+(p15) sub dst1=dst1,t1
+ ;;
+4:
+ // Tail correction.
+ //
+ // The problem with this piplelined loop is that the last word is not
+ // loaded and thus parf of the last word written is not correct.
+ // To fix that, we simply copy the tail byte by byte.
+
+ sub len1=endsrc,src1,1
+ clrrrb
+ ;;
+ mov ar.ec=PIPE_DEPTH
+ mov pr.rot=1<<16 // p16=true all others are false
+ mov ar.lc=len1
+ ;;
+5:
+ EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
+ EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
+ br.ctop.dptk.few 5b
+ ;;
+ mov ar.lc=saved_lc
+ mov pr=saved_pr,0xffffffffffff0000
+ mov ar.pfs=saved_pfs
+ br.ret.sptk.many rp
+
+ //
+ // Beginning of long mempcy (i.e. > 16 bytes)
+ //
+.long_copy_user:
+ tbit.nz p6,p7=src1,0 // odd alignment
+ and tmp=7,tmp
+ ;;
+ cmp.eq p10,p8=r0,tmp
+ mov len1=len // copy because of rotation
+(p8) br.cond.dpnt .diff_align_copy_user
+ ;;
+ // At this point we know we have more than 16 bytes to copy
+ // and also that both src and dest have the same alignment
+ // which may not be the one we want. So for now we must move
+ // forward slowly until we reach 16byte alignment: no need to
+ // worry about reaching the end of buffer.
+ //
+ EX(.failure_in1,(p6) ld1 val1[0]=[src1],1) // 1-byte aligned
+(p6) adds len1=-1,len1;;
+ tbit.nz p7,p0=src1,1
+ ;;
+ EX(.failure_in1,(p7) ld2 val1[1]=[src1],2) // 2-byte aligned
+(p7) adds len1=-2,len1;;
+ tbit.nz p8,p0=src1,2
+ ;;
+ //
+ // Stop bit not required after ld4 because if we fail on ld4
+ // we have never executed the ld1, therefore st1 is not executed.
+ //
+ EX(.failure_in1,(p8) ld4 val2[0]=[src1],4) // 4-byte aligned
+ ;;
+ EX(.failure_out,(p6) st1 [dst1]=val1[0],1)
+ tbit.nz p9,p0=src1,3
+ ;;
+ //
+ // Stop bit not required after ld8 because if we fail on ld8
+ // we have never executed the ld2, therefore st2 is not executed.
+ //
+ EX(.failure_in1,(p9) ld8 val2[1]=[src1],8) // 8-byte aligned
+ EX(.failure_out,(p7) st2 [dst1]=val1[1],2)
+(p8) adds len1=-4,len1
+ ;;
+ EX(.failure_out, (p8) st4 [dst1]=val2[0],4)
+(p9) adds len1=-8,len1;;
+ shr.u cnt=len1,4 // number of 128-bit (2x64bit) words
+ ;;
+ EX(.failure_out, (p9) st8 [dst1]=val2[1],8)
+ tbit.nz p6,p0=len1,3
+ cmp.eq p7,p0=r0,cnt
+ adds tmp=-1,cnt // br.ctop is repeat/until
+(p7) br.cond.dpnt .dotail // we have less than 16 bytes left
+ ;;
+ adds src2=8,src1
+ adds dst2=8,dst1
+ mov ar.lc=tmp
+ ;;
+ //
+ // 16bytes/iteration
+ //
+2:
+ EX(.failure_in3,(p16) ld8 val1[0]=[src1],16)
+(p16) ld8 val2[0]=[src2],16
+
+ EX(.failure_out, (EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16)
+(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16
+ br.ctop.dptk 2b
+ ;; // RAW on src1 when fall through from loop
+ //
+ // Tail correction based on len only
+ //
+ // No matter where we come from (loop or test) the src1 pointer
+ // is 16 byte aligned AND we have less than 16 bytes to copy.
+ //
+.dotail:
+ EX(.failure_in1,(p6) ld8 val1[0]=[src1],8) // at least 8 bytes
+ tbit.nz p7,p0=len1,2
+ ;;
+ EX(.failure_in1,(p7) ld4 val1[1]=[src1],4) // at least 4 bytes
+ tbit.nz p8,p0=len1,1
+ ;;
+ EX(.failure_in1,(p8) ld2 val2[0]=[src1],2) // at least 2 bytes
+ tbit.nz p9,p0=len1,0
+ ;;
+ EX(.failure_out, (p6) st8 [dst1]=val1[0],8)
+ ;;
+ EX(.failure_in1,(p9) ld1 val2[1]=[src1]) // only 1 byte left
+ mov ar.lc=saved_lc
+ ;;
+ EX(.failure_out,(p7) st4 [dst1]=val1[1],4)
+ mov pr=saved_pr,0xffffffffffff0000
+ ;;
+ EX(.failure_out, (p8) st2 [dst1]=val2[0],2)
+ mov ar.pfs=saved_pfs
+ ;;
+ EX(.failure_out, (p9) st1 [dst1]=val2[1])
+ br.ret.sptk.many rp
+
+
+ //
+ // Here we handle the case where the byte by byte copy fails
+ // on the load.
+ // Several factors make the zeroing of the rest of the buffer kind of
+ // tricky:
+ // - the pipeline: loads/stores are not in sync (pipeline)
+ //
+ // In the same loop iteration, the dst1 pointer does not directly
+ // reflect where the faulty load was.
+ //
+ // - pipeline effect
+ // When you get a fault on load, you may have valid data from
+ // previous loads not yet store in transit. Such data must be
+ // store normally before moving onto zeroing the rest.
+ //
+ // - single/multi dispersal independence.
+ //
+ // solution:
+ // - we don't disrupt the pipeline, i.e. data in transit in
+ // the software pipeline will be eventually move to memory.
+ // We simply replace the load with a simple mov and keep the
+ // pipeline going. We can't really do this inline because
+ // p16 is always reset to 1 when lc > 0.
+ //
+.failure_in_pipe1:
+ sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
+1:
+(p16) mov val1[0]=r0
+(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1
+ br.ctop.dptk 1b
+ ;;
+ mov pr=saved_pr,0xffffffffffff0000
+ mov ar.lc=saved_lc
+ mov ar.pfs=saved_pfs
+ br.ret.sptk.many rp
+
+ //
+ // This is the case where the byte by byte copy fails on the load
+ // when we copy the head. We need to finish the pipeline and copy
+ // zeros for the rest of the destination. Since this happens
+ // at the top we still need to fill the body and tail.
+.failure_in_pipe2:
+ sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
+2:
+(p16) mov val1[0]=r0
+(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1
+ br.ctop.dptk 2b
+ ;;
+ sub len=enddst,dst1,1 // precompute len
+ br.cond.dptk.many .failure_in1bis
+ ;;
+
+ //
+ // Here we handle the head & tail part when we check for alignment.
+ // The following code handles only the load failures. The
+ // main diffculty comes from the fact that loads/stores are
+ // scheduled. So when you fail on a load, the stores corresponding
+ // to previous successful loads must be executed.
+ //
+ // However some simplifications are possible given the way
+ // things work.
+ //
+ // 1) HEAD
+ // Theory of operation:
+ //
+ // Page A | Page B
+ // ---------|-----
+ // 1|8 x
+ // 1 2|8 x
+ // 4|8 x
+ // 1 4|8 x
+ // 2 4|8 x
+ // 1 2 4|8 x
+ // |1
+ // |2 x
+ // |4 x
+ //
+ // page_size >= 4k (2^12). (x means 4, 2, 1)
+ // Here we suppose Page A exists and Page B does not.
+ //
+ // As we move towards eight byte alignment we may encounter faults.
+ // The numbers on each page show the size of the load (current alignment).
+ //
+ // Key point:
+ // - if you fail on 1, 2, 4 then you have never executed any smaller
+ // size loads, e.g. failing ld4 means no ld1 nor ld2 executed
+ // before.
+ //
+ // This allows us to simplify the cleanup code, because basically you
+ // only have to worry about "pending" stores in the case of a failing
+ // ld8(). Given the way the code is written today, this means only
+ // worry about st2, st4. There we can use the information encapsulated
+ // into the predicates.
+ //
+ // Other key point:
+ // - if you fail on the ld8 in the head, it means you went straight
+ // to it, i.e. 8byte alignment within an unexisting page.
+ // Again this comes from the fact that if you crossed just for the ld8 then
+ // you are 8byte aligned but also 16byte align, therefore you would
+ // either go for the 16byte copy loop OR the ld8 in the tail part.
+ // The combination ld1, ld2, ld4, ld8 where you fail on ld8 is impossible
+ // because it would mean you had 15bytes to copy in which case you
+ // would have defaulted to the byte by byte copy.
+ //
+ //
+ // 2) TAIL
+ // Here we now we have less than 16 bytes AND we are either 8 or 16 byte
+ // aligned.
+ //
+ // Key point:
+ // This means that we either:
+ // - are right on a page boundary
+ // OR
+ // - are at more than 16 bytes from a page boundary with
+ // at most 15 bytes to copy: no chance of crossing.
+ //
+ // This allows us to assume that if we fail on a load we haven't possibly
+ // executed any of the previous (tail) ones, so we don't need to do
+ // any stores. For instance, if we fail on ld2, this means we had
+ // 2 or 3 bytes left to copy and we did not execute the ld8 nor ld4.
+ //
+ // This means that we are in a situation similar the a fault in the
+ // head part. That's nice!
+ //
+.failure_in1:
+ sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
+ sub len=endsrc,src1,1
+ //
+ // we know that ret0 can never be zero at this point
+ // because we failed why trying to do a load, i.e. there is still
+ // some work to do.
+ // The failure_in1bis and length problem is taken care of at the
+ // calling side.
+ //
+ ;;
+.failure_in1bis: // from (.failure_in3)
+ mov ar.lc=len // Continue with a stupid byte store.
+ ;;
+5:
+ st1 [dst1]=r0,1
+ br.cloop.dptk 5b
+ ;;
+ mov pr=saved_pr,0xffffffffffff0000
+ mov ar.lc=saved_lc
+ mov ar.pfs=saved_pfs
+ br.ret.sptk.many rp
+
+ //
+ // Here we simply restart the loop but instead
+ // of doing loads we fill the pipeline with zeroes
+ // We can't simply store r0 because we may have valid
+ // data in transit in the pipeline.
+ // ar.lc and ar.ec are setup correctly at this point
+ //
+ // we MUST use src1/endsrc here and not dst1/enddst because
+ // of the pipeline effect.
+ //
+.failure_in3:
+ sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
+ ;;
+2:
+(p16) mov val1[0]=r0
+(p16) mov val2[0]=r0
+(EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16
+(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16
+ br.ctop.dptk 2b
+ ;;
+ cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ?
+ sub len=enddst,dst1,1 // precompute len
+(p6) br.cond.dptk .failure_in1bis
+ ;;
+ mov pr=saved_pr,0xffffffffffff0000
+ mov ar.lc=saved_lc
+ mov ar.pfs=saved_pfs
+ br.ret.sptk.many rp
+
+.failure_in2:
+ sub ret0=endsrc,src1
+ cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ?
+ sub len=enddst,dst1,1 // precompute len
+(p6) br.cond.dptk .failure_in1bis
+ ;;
+ mov pr=saved_pr,0xffffffffffff0000
+ mov ar.lc=saved_lc
+ mov ar.pfs=saved_pfs
+ br.ret.sptk.many rp
+
+ //
+ // handling of failures on stores: that's the easy part
+ //
+.failure_out:
+ sub ret0=enddst,dst1
+ mov pr=saved_pr,0xffffffffffff0000
+ mov ar.lc=saved_lc
+
+ mov ar.pfs=saved_pfs
+ br.ret.sptk.many rp
+END(__copy_user)
diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c
new file mode 100644
index 00000000000..36866e8a5d2
--- /dev/null
+++ b/arch/ia64/lib/csum_partial_copy.c
@@ -0,0 +1,151 @@
+/*
+ * Network Checksum & Copy routine
+ *
+ * Copyright (C) 1999, 2003-2004 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * Most of the code has been imported from Linux/Alpha
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * XXX Fixme: those 2 inlines are meant for debugging and will go away
+ */
+static inline unsigned
+short from64to16(unsigned long x)
+{
+ /* add up 32-bit words for 33 bits */
+ x = (x & 0xffffffff) + (x >> 32);
+ /* add up 16-bit and 17-bit words for 17+c bits */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up 16-bit and 2-bit for 16+c bit */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up carry.. */
+ x = (x & 0xffff) + (x >> 16);
+ return x;
+}
+
+static inline
+unsigned long do_csum_c(const unsigned char * buff, int len, unsigned int psum)
+{
+ int odd, count;
+ unsigned long result = (unsigned long)psum;
+
+ if (len <= 0)
+ goto out;
+ odd = 1 & (unsigned long) buff;
+ if (odd) {
+ result = *buff << 8;
+ len--;
+ buff++;
+ }
+ count = len >> 1; /* nr of 16-bit words.. */
+ if (count) {
+ if (2 & (unsigned long) buff) {
+ result += *(unsigned short *) buff;
+ count--;
+ len -= 2;
+ buff += 2;
+ }
+ count >>= 1; /* nr of 32-bit words.. */
+ if (count) {
+ if (4 & (unsigned long) buff) {
+ result += *(unsigned int *) buff;
+ count--;
+ len -= 4;
+ buff += 4;
+ }
+ count >>= 1; /* nr of 64-bit words.. */
+ if (count) {
+ unsigned long carry = 0;
+ do {
+ unsigned long w = *(unsigned long *) buff;
+ count--;
+ buff += 8;
+ result += carry;
+ result += w;
+ carry = (w > result);
+ } while (count);
+ result += carry;
+ result = (result & 0xffffffff) + (result >> 32);
+ }
+ if (len & 4) {
+ result += *(unsigned int *) buff;
+ buff += 4;
+ }
+ }
+ if (len & 2) {
+ result += *(unsigned short *) buff;
+ buff += 2;
+ }
+ }
+ if (len & 1)
+ result += *buff;
+
+ result = from64to16(result);
+
+ if (odd)
+ result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+
+out:
+ return result;
+}
+
+/*
+ * XXX Fixme
+ *
+ * This is very ugly but temporary. THIS NEEDS SERIOUS ENHANCEMENTS.
+ * But it's very tricky to get right even in C.
+ */
+extern unsigned long do_csum(const unsigned char *, long);
+
+static unsigned int
+do_csum_partial_copy_from_user (const unsigned char __user *src, unsigned char *dst,
+ int len, unsigned int psum, int *errp)
+{
+ unsigned long result;
+
+ /* XXX Fixme
+ * for now we separate the copy from checksum for obvious
+ * alignment difficulties. Look at the Alpha code and you'll be
+ * scared.
+ */
+
+ if (__copy_from_user(dst, src, len) != 0 && errp)
+ *errp = -EFAULT;
+
+ result = do_csum(dst, len);
+
+ /* add in old sum, and carry.. */
+ result += psum;
+ /* 32+c bits -> 32 bits */
+ result = (result & 0xffffffff) + (result >> 32);
+ return result;
+}
+
+unsigned int
+csum_partial_copy_from_user (const unsigned char __user *src, unsigned char *dst,
+ int len, unsigned int sum, int *errp)
+{
+ if (!access_ok(VERIFY_READ, src, len)) {
+ *errp = -EFAULT;
+ memset(dst, 0, len);
+ return sum;
+ }
+
+ return do_csum_partial_copy_from_user(src, dst, len, sum, errp);
+}
+
+unsigned int
+csum_partial_copy_nocheck(const unsigned char __user *src, unsigned char *dst,
+ int len, unsigned int sum)
+{
+ return do_csum_partial_copy_from_user(src, dst, len, sum, NULL);
+}
+
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/ia64/lib/dec_and_lock.c b/arch/ia64/lib/dec_and_lock.c
new file mode 100644
index 00000000000..c7ce92f968f
--- /dev/null
+++ b/arch/ia64/lib/dec_and_lock.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2003 Jerome Marchand, Bull S.A.
+ * Cleaned up by David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * This file is released under the GPLv2, or at your option any later version.
+ *
+ * ia64 version of "atomic_dec_and_lock()" using the atomic "cmpxchg" instruction. This
+ * code is an adaptation of the x86 version of "atomic_dec_and_lock()".
+ */
+
+#include <linux/compiler.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+
+/*
+ * Decrement REFCOUNT and if the count reaches zero, acquire the spinlock. Both of these
+ * operations have to be done atomically, so that the count doesn't drop to zero without
+ * acquiring the spinlock first.
+ */
+int
+_atomic_dec_and_lock (atomic_t *refcount, spinlock_t *lock)
+{
+ int old, new;
+
+ do {
+ old = atomic_read(refcount);
+ new = old - 1;
+
+ if (unlikely (old == 1)) {
+ /* oops, we may be decrementing to zero, do it the slow way... */
+ spin_lock(lock);
+ if (atomic_dec_and_test(refcount))
+ return 1;
+ spin_unlock(lock);
+ return 0;
+ }
+ } while (cmpxchg(&refcount->counter, old, new) != old);
+ return 0;
+}
+
+EXPORT_SYMBOL(_atomic_dec_and_lock);
diff --git a/arch/ia64/lib/do_csum.S b/arch/ia64/lib/do_csum.S
new file mode 100644
index 00000000000..6bec2fc9f5b
--- /dev/null
+++ b/arch/ia64/lib/do_csum.S
@@ -0,0 +1,323 @@
+/*
+ *
+ * Optmized version of the standard do_csum() function
+ *
+ * Return: a 64bit quantity containing the 16bit Internet checksum
+ *
+ * Inputs:
+ * in0: address of buffer to checksum (char *)
+ * in1: length of the buffer (int)
+ *
+ * Copyright (C) 1999, 2001-2002 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 02/04/22 Ken Chen <kenneth.w.chen@intel.com>
+ * Data locality study on the checksum buffer.
+ * More optimization cleanup - remove excessive stop bits.
+ * 02/04/08 David Mosberger <davidm@hpl.hp.com>
+ * More cleanup and tuning.
+ * 01/04/18 Jun Nakajima <jun.nakajima@intel.com>
+ * Clean up and optimize and the software pipeline, loading two
+ * back-to-back 8-byte words per loop. Clean up the initialization
+ * for the loop. Support the cases where load latency = 1 or 2.
+ * Set CONFIG_IA64_LOAD_LATENCY to 1 or 2 (default).
+ */
+
+#include <asm/asmmacro.h>
+
+//
+// Theory of operations:
+// The goal is to go as quickly as possible to the point where
+// we can checksum 16 bytes/loop. Before reaching that point we must
+// take care of incorrect alignment of first byte.
+//
+// The code hereafter also takes care of the "tail" part of the buffer
+// before entering the core loop, if any. The checksum is a sum so it
+// allows us to commute operations. So we do the "head" and "tail"
+// first to finish at full speed in the body. Once we get the head and
+// tail values, we feed them into the pipeline, very handy initialization.
+//
+// Of course we deal with the special case where the whole buffer fits
+// into one 8 byte word. In this case we have only one entry in the pipeline.
+//
+// We use a (LOAD_LATENCY+2)-stage pipeline in the loop to account for
+// possible load latency and also to accommodate for head and tail.
+//
+// The end of the function deals with folding the checksum from 64bits
+// down to 16bits taking care of the carry.
+//
+// This version avoids synchronization in the core loop by also using a
+// pipeline for the accumulation of the checksum in resultx[] (x=1,2).
+//
+// wordx[] (x=1,2)
+// |---|
+// | | 0 : new value loaded in pipeline
+// |---|
+// | | - : in transit data
+// |---|
+// | | LOAD_LATENCY : current value to add to checksum
+// |---|
+// | | LOAD_LATENCY+1 : previous value added to checksum
+// |---| (previous iteration)
+//
+// resultx[] (x=1,2)
+// |---|
+// | | 0 : initial value
+// |---|
+// | | LOAD_LATENCY-1 : new checksum
+// |---|
+// | | LOAD_LATENCY : previous value of checksum
+// |---|
+// | | LOAD_LATENCY+1 : final checksum when out of the loop
+// |---|
+//
+//
+// See RFC1071 "Computing the Internet Checksum" for various techniques for
+// calculating the Internet checksum.
+//
+// NOT YET DONE:
+// - Maybe another algorithm which would take care of the folding at the
+// end in a different manner
+// - Work with people more knowledgeable than me on the network stack
+// to figure out if we could not split the function depending on the
+// type of packet or alignment we get. Like the ip_fast_csum() routine
+// where we know we have at least 20bytes worth of data to checksum.
+// - Do a better job of handling small packets.
+// - Note on prefetching: it was found that under various load, i.e. ftp read/write,
+// nfs read/write, the L1 cache hit rate is at 60% and L2 cache hit rate is at 99.8%
+// on the data that buffer points to (partly because the checksum is often preceded by
+// a copy_from_user()). This finding indiate that lfetch will not be beneficial since
+// the data is already in the cache.
+//
+
+#define saved_pfs r11
+#define hmask r16
+#define tmask r17
+#define first1 r18
+#define firstval r19
+#define firstoff r20
+#define last r21
+#define lastval r22
+#define lastoff r23
+#define saved_lc r24
+#define saved_pr r25
+#define tmp1 r26
+#define tmp2 r27
+#define tmp3 r28
+#define carry1 r29
+#define carry2 r30
+#define first2 r31
+
+#define buf in0
+#define len in1
+
+#define LOAD_LATENCY 2 // XXX fix me
+
+#if (LOAD_LATENCY != 1) && (LOAD_LATENCY != 2)
+# error "Only 1 or 2 is supported/tested for LOAD_LATENCY."
+#endif
+
+#define PIPE_DEPTH (LOAD_LATENCY+2)
+#define ELD p[LOAD_LATENCY] // end of load
+#define ELD_1 p[LOAD_LATENCY+1] // and next stage
+
+// unsigned long do_csum(unsigned char *buf,long len)
+
+GLOBAL_ENTRY(do_csum)
+ .prologue
+ .save ar.pfs, saved_pfs
+ alloc saved_pfs=ar.pfs,2,16,0,16
+ .rotr word1[4], word2[4],result1[LOAD_LATENCY+2],result2[LOAD_LATENCY+2]
+ .rotp p[PIPE_DEPTH], pC1[2], pC2[2]
+ mov ret0=r0 // in case we have zero length
+ cmp.lt p0,p6=r0,len // check for zero length or negative (32bit len)
+ ;;
+ add tmp1=buf,len // last byte's address
+ .save pr, saved_pr
+ mov saved_pr=pr // preserve predicates (rotation)
+(p6) br.ret.spnt.many rp // return if zero or negative length
+
+ mov hmask=-1 // initialize head mask
+ tbit.nz p15,p0=buf,0 // is buf an odd address?
+ and first1=-8,buf // 8-byte align down address of first1 element
+
+ and firstoff=7,buf // how many bytes off for first1 element
+ mov tmask=-1 // initialize tail mask
+
+ ;;
+ adds tmp2=-1,tmp1 // last-1
+ and lastoff=7,tmp1 // how many bytes off for last element
+ ;;
+ sub tmp1=8,lastoff // complement to lastoff
+ and last=-8,tmp2 // address of word containing last byte
+ ;;
+ sub tmp3=last,first1 // tmp3=distance from first1 to last
+ .save ar.lc, saved_lc
+ mov saved_lc=ar.lc // save lc
+ cmp.eq p8,p9=last,first1 // everything fits in one word ?
+
+ ld8 firstval=[first1],8 // load, ahead of time, "first1" word
+ and tmp1=7, tmp1 // make sure that if tmp1==8 -> tmp1=0
+ shl tmp2=firstoff,3 // number of bits
+ ;;
+(p9) ld8 lastval=[last] // load, ahead of time, "last" word, if needed
+ shl tmp1=tmp1,3 // number of bits
+(p9) adds tmp3=-8,tmp3 // effectively loaded
+ ;;
+(p8) mov lastval=r0 // we don't need lastval if first1==last
+ shl hmask=hmask,tmp2 // build head mask, mask off [0,first1off[
+ shr.u tmask=tmask,tmp1 // build tail mask, mask off ]8,lastoff]
+ ;;
+ .body
+#define count tmp3
+
+(p8) and hmask=hmask,tmask // apply tail mask to head mask if 1 word only
+(p9) and word2[0]=lastval,tmask // mask last it as appropriate
+ shr.u count=count,3 // how many 8-byte?
+ ;;
+ // If count is odd, finish this 8-byte word so that we can
+ // load two back-to-back 8-byte words per loop thereafter.
+ and word1[0]=firstval,hmask // and mask it as appropriate
+ tbit.nz p10,p11=count,0 // if (count is odd)
+ ;;
+(p8) mov result1[0]=word1[0]
+(p9) add result1[0]=word1[0],word2[0]
+ ;;
+ cmp.ltu p6,p0=result1[0],word1[0] // check the carry
+ cmp.eq.or.andcm p8,p0=0,count // exit if zero 8-byte
+ ;;
+(p6) adds result1[0]=1,result1[0]
+(p8) br.cond.dptk .do_csum_exit // if (within an 8-byte word)
+(p11) br.cond.dptk .do_csum16 // if (count is even)
+
+ // Here count is odd.
+ ld8 word1[1]=[first1],8 // load an 8-byte word
+ cmp.eq p9,p10=1,count // if (count == 1)
+ adds count=-1,count // loaded an 8-byte word
+ ;;
+ add result1[0]=result1[0],word1[1]
+ ;;
+ cmp.ltu p6,p0=result1[0],word1[1]
+ ;;
+(p6) adds result1[0]=1,result1[0]
+(p9) br.cond.sptk .do_csum_exit // if (count == 1) exit
+ // Fall through to caluculate the checksum, feeding result1[0] as
+ // the initial value in result1[0].
+ //
+ // Calculate the checksum loading two 8-byte words per loop.
+ //
+.do_csum16:
+ add first2=8,first1
+ shr.u count=count,1 // we do 16 bytes per loop
+ ;;
+ adds count=-1,count
+ mov carry1=r0
+ mov carry2=r0
+ brp.loop.imp 1f,2f
+ ;;
+ mov ar.ec=PIPE_DEPTH
+ mov ar.lc=count // set lc
+ mov pr.rot=1<<16
+ // result1[0] must be initialized in advance.
+ mov result2[0]=r0
+ ;;
+ .align 32
+1:
+(ELD_1) cmp.ltu pC1[0],p0=result1[LOAD_LATENCY],word1[LOAD_LATENCY+1]
+(pC1[1])adds carry1=1,carry1
+(ELD_1) cmp.ltu pC2[0],p0=result2[LOAD_LATENCY],word2[LOAD_LATENCY+1]
+(pC2[1])adds carry2=1,carry2
+(ELD) add result1[LOAD_LATENCY-1]=result1[LOAD_LATENCY],word1[LOAD_LATENCY]
+(ELD) add result2[LOAD_LATENCY-1]=result2[LOAD_LATENCY],word2[LOAD_LATENCY]
+2:
+(p[0]) ld8 word1[0]=[first1],16
+(p[0]) ld8 word2[0]=[first2],16
+ br.ctop.sptk 1b
+ ;;
+ // Since len is a 32-bit value, carry cannot be larger than a 64-bit value.
+(pC1[1])adds carry1=1,carry1 // since we miss the last one
+(pC2[1])adds carry2=1,carry2
+ ;;
+ add result1[LOAD_LATENCY+1]=result1[LOAD_LATENCY+1],carry1
+ add result2[LOAD_LATENCY+1]=result2[LOAD_LATENCY+1],carry2
+ ;;
+ cmp.ltu p6,p0=result1[LOAD_LATENCY+1],carry1
+ cmp.ltu p7,p0=result2[LOAD_LATENCY+1],carry2
+ ;;
+(p6) adds result1[LOAD_LATENCY+1]=1,result1[LOAD_LATENCY+1]
+(p7) adds result2[LOAD_LATENCY+1]=1,result2[LOAD_LATENCY+1]
+ ;;
+ add result1[0]=result1[LOAD_LATENCY+1],result2[LOAD_LATENCY+1]
+ ;;
+ cmp.ltu p6,p0=result1[0],result2[LOAD_LATENCY+1]
+ ;;
+(p6) adds result1[0]=1,result1[0]
+ ;;
+.do_csum_exit:
+ //
+ // now fold 64 into 16 bits taking care of carry
+ // that's not very good because it has lots of sequentiality
+ //
+ mov tmp3=0xffff
+ zxt4 tmp1=result1[0]
+ shr.u tmp2=result1[0],32
+ ;;
+ add result1[0]=tmp1,tmp2
+ ;;
+ and tmp1=result1[0],tmp3
+ shr.u tmp2=result1[0],16
+ ;;
+ add result1[0]=tmp1,tmp2
+ ;;
+ and tmp1=result1[0],tmp3
+ shr.u tmp2=result1[0],16
+ ;;
+ add result1[0]=tmp1,tmp2
+ ;;
+ and tmp1=result1[0],tmp3
+ shr.u tmp2=result1[0],16
+ ;;
+ add ret0=tmp1,tmp2
+ mov pr=saved_pr,0xffffffffffff0000
+ ;;
+ // if buf was odd then swap bytes
+ mov ar.pfs=saved_pfs // restore ar.ec
+(p15) mux1 ret0=ret0,@rev // reverse word
+ ;;
+ mov ar.lc=saved_lc
+(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes
+ br.ret.sptk.many rp
+
+// I (Jun Nakajima) wrote an equivalent code (see below), but it was
+// not much better than the original. So keep the original there so that
+// someone else can challenge.
+//
+// shr.u word1[0]=result1[0],32
+// zxt4 result1[0]=result1[0]
+// ;;
+// add result1[0]=result1[0],word1[0]
+// ;;
+// zxt2 result2[0]=result1[0]
+// extr.u word1[0]=result1[0],16,16
+// shr.u carry1=result1[0],32
+// ;;
+// add result2[0]=result2[0],word1[0]
+// ;;
+// add result2[0]=result2[0],carry1
+// ;;
+// extr.u ret0=result2[0],16,16
+// ;;
+// add ret0=ret0,result2[0]
+// ;;
+// zxt2 ret0=ret0
+// mov ar.pfs=saved_pfs // restore ar.ec
+// mov pr=saved_pr,0xffffffffffff0000
+// ;;
+// // if buf was odd then swap bytes
+// mov ar.lc=saved_lc
+//(p15) mux1 ret0=ret0,@rev // reverse word
+// ;;
+//(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes
+// br.ret.sptk.many rp
+
+END(do_csum)
diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S
new file mode 100644
index 00000000000..29c802b1966
--- /dev/null
+++ b/arch/ia64/lib/flush.S
@@ -0,0 +1,39 @@
+/*
+ * Cache flushing routines.
+ *
+ * Copyright (C) 1999-2001 Hewlett-Packard Co
+ * Copyright (C) 1999-2001 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+ /*
+ * flush_icache_range(start,end)
+ * Must flush range from start to end-1 but nothing else (need to
+ * be careful not to touch addresses that may be unmapped).
+ */
+GLOBAL_ENTRY(flush_icache_range)
+ .prologue
+ alloc r2=ar.pfs,2,0,0,0
+ sub r8=in1,in0,1
+ ;;
+ shr.u r8=r8,5 // we flush 32 bytes per iteration
+ .save ar.lc, r3
+ mov r3=ar.lc // save ar.lc
+ ;;
+
+ .body
+
+ mov ar.lc=r8
+ ;;
+.Loop: fc in0 // issuable on M0 only
+ add in0=32,in0
+ br.cloop.sptk.few .Loop
+ ;;
+ sync.i
+ ;;
+ srlz.i
+ ;;
+ mov ar.lc=r3 // restore ar.lc
+ br.ret.sptk.many rp
+END(flush_icache_range)
diff --git a/arch/ia64/lib/idiv32.S b/arch/ia64/lib/idiv32.S
new file mode 100644
index 00000000000..2ac28bf0a66
--- /dev/null
+++ b/arch/ia64/lib/idiv32.S
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 32-bit integer division.
+ *
+ * This code is based on the application note entitled "Divide, Square Root
+ * and Remainder Algorithms for the IA-64 Architecture". This document
+ * is available as Intel document number 248725-002 or via the web at
+ * http://developer.intel.com/software/opensource/numerics/
+ *
+ * For more details on the theory behind these algorithms, see "IA-64
+ * and Elementary Functions" by Peter Markstein; HP Professional Books
+ * (http://www.hp.com/go/retailbooks/)
+ */
+
+#include <asm/asmmacro.h>
+
+#ifdef MODULO
+# define OP mod
+#else
+# define OP div
+#endif
+
+#ifdef UNSIGNED
+# define SGN u
+# define EXTEND zxt4
+# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b
+# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b
+#else
+# define SGN
+# define EXTEND sxt4
+# define INT_TO_FP(a,b) fcvt.xf a=b
+# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b
+#endif
+
+#define PASTE1(a,b) a##b
+#define PASTE(a,b) PASTE1(a,b)
+#define NAME PASTE(PASTE(__,SGN),PASTE(OP,si3))
+
+GLOBAL_ENTRY(NAME)
+ .regstk 2,0,0,0
+ // Transfer inputs to FP registers.
+ mov r2 = 0xffdd // r2 = -34 + 65535 (fp reg format bias)
+ EXTEND in0 = in0 // in0 = a
+ EXTEND in1 = in1 // in1 = b
+ ;;
+ setf.sig f8 = in0
+ setf.sig f9 = in1
+#ifdef MODULO
+ sub in1 = r0, in1 // in1 = -b
+#endif
+ ;;
+ // Convert the inputs to FP, to avoid FP software-assist faults.
+ INT_TO_FP(f8, f8)
+ INT_TO_FP(f9, f9)
+ ;;
+ setf.exp f7 = r2 // f7 = 2^-34
+ frcpa.s1 f6, p6 = f8, f9 // y0 = frcpa(b)
+ ;;
+(p6) fmpy.s1 f8 = f8, f6 // q0 = a*y0
+(p6) fnma.s1 f6 = f9, f6, f1 // e0 = -b*y0 + 1
+ ;;
+#ifdef MODULO
+ setf.sig f9 = in1 // f9 = -b
+#endif
+(p6) fma.s1 f8 = f6, f8, f8 // q1 = e0*q0 + q0
+(p6) fma.s1 f6 = f6, f6, f7 // e1 = e0*e0 + 2^-34
+ ;;
+#ifdef MODULO
+ setf.sig f7 = in0
+#endif
+(p6) fma.s1 f6 = f6, f8, f8 // q2 = e1*q1 + q1
+ ;;
+ FP_TO_INT(f6, f6) // q = trunc(q2)
+ ;;
+#ifdef MODULO
+ xma.l f6 = f6, f9, f7 // r = q*(-b) + a
+ ;;
+#endif
+ getf.sig r8 = f6 // transfer result to result register
+ br.ret.sptk.many rp
+END(NAME)
diff --git a/arch/ia64/lib/idiv64.S b/arch/ia64/lib/idiv64.S
new file mode 100644
index 00000000000..f69bd2b0987
--- /dev/null
+++ b/arch/ia64/lib/idiv64.S
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 64-bit integer division.
+ *
+ * This code is based on the application note entitled "Divide, Square Root
+ * and Remainder Algorithms for the IA-64 Architecture". This document
+ * is available as Intel document number 248725-002 or via the web at
+ * http://developer.intel.com/software/opensource/numerics/
+ *
+ * For more details on the theory behind these algorithms, see "IA-64
+ * and Elementary Functions" by Peter Markstein; HP Professional Books
+ * (http://www.hp.com/go/retailbooks/)
+ */
+
+#include <asm/asmmacro.h>
+
+#ifdef MODULO
+# define OP mod
+#else
+# define OP div
+#endif
+
+#ifdef UNSIGNED
+# define SGN u
+# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b
+# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b
+#else
+# define SGN
+# define INT_TO_FP(a,b) fcvt.xf a=b
+# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b
+#endif
+
+#define PASTE1(a,b) a##b
+#define PASTE(a,b) PASTE1(a,b)
+#define NAME PASTE(PASTE(__,SGN),PASTE(OP,di3))
+
+GLOBAL_ENTRY(NAME)
+ .regstk 2,0,0,0
+ // Transfer inputs to FP registers.
+ setf.sig f8 = in0
+ setf.sig f9 = in1
+ ;;
+ // Convert the inputs to FP, to avoid FP software-assist faults.
+ INT_TO_FP(f8, f8)
+ INT_TO_FP(f9, f9)
+ ;;
+ frcpa.s1 f11, p6 = f8, f9 // y0 = frcpa(b)
+ ;;
+(p6) fmpy.s1 f7 = f8, f11 // q0 = a*y0
+(p6) fnma.s1 f6 = f9, f11, f1 // e0 = -b*y0 + 1
+ ;;
+(p6) fma.s1 f10 = f7, f6, f7 // q1 = q0*e0 + q0
+(p6) fmpy.s1 f7 = f6, f6 // e1 = e0*e0
+ ;;
+#ifdef MODULO
+ sub in1 = r0, in1 // in1 = -b
+#endif
+(p6) fma.s1 f10 = f10, f7, f10 // q2 = q1*e1 + q1
+(p6) fma.s1 f6 = f11, f6, f11 // y1 = y0*e0 + y0
+ ;;
+(p6) fma.s1 f6 = f6, f7, f6 // y2 = y1*e1 + y1
+(p6) fnma.s1 f7 = f9, f10, f8 // r = -b*q2 + a
+ ;;
+#ifdef MODULO
+ setf.sig f8 = in0 // f8 = a
+ setf.sig f9 = in1 // f9 = -b
+#endif
+(p6) fma.s1 f11 = f7, f6, f10 // q3 = r*y2 + q2
+ ;;
+ FP_TO_INT(f11, f11) // q = trunc(q3)
+ ;;
+#ifdef MODULO
+ xma.l f11 = f11, f9, f8 // r = q*(-b) + a
+ ;;
+#endif
+ getf.sig r8 = f11 // transfer result to result register
+ br.ret.sptk.many rp
+END(NAME)
diff --git a/arch/ia64/lib/io.c b/arch/ia64/lib/io.c
new file mode 100644
index 00000000000..8949e44091a
--- /dev/null
+++ b/arch/ia64/lib/io.c
@@ -0,0 +1,165 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include <asm/io.h>
+
+/*
+ * Copy data from IO memory space to "real" memory space.
+ * This needs to be optimized.
+ */
+void memcpy_fromio(void *to, const volatile void __iomem *from, long count)
+{
+ char *dst = to;
+
+ while (count) {
+ count--;
+ *dst++ = readb(from++);
+ }
+}
+EXPORT_SYMBOL(memcpy_fromio);
+
+/*
+ * Copy data from "real" memory space to IO memory space.
+ * This needs to be optimized.
+ */
+void memcpy_toio(volatile void __iomem *to, const void *from, long count)
+{
+ const char *src = from;
+
+ while (count) {
+ count--;
+ writeb(*src++, to++);
+ }
+}
+EXPORT_SYMBOL(memcpy_toio);
+
+/*
+ * "memset" on IO memory space.
+ * This needs to be optimized.
+ */
+void memset_io(volatile void __iomem *dst, int c, long count)
+{
+ unsigned char ch = (char)(c & 0xff);
+
+ while (count) {
+ count--;
+ writeb(ch, dst);
+ dst++;
+ }
+}
+EXPORT_SYMBOL(memset_io);
+
+#ifdef CONFIG_IA64_GENERIC
+
+#undef __ia64_inb
+#undef __ia64_inw
+#undef __ia64_inl
+#undef __ia64_outb
+#undef __ia64_outw
+#undef __ia64_outl
+#undef __ia64_readb
+#undef __ia64_readw
+#undef __ia64_readl
+#undef __ia64_readq
+#undef __ia64_readb_relaxed
+#undef __ia64_readw_relaxed
+#undef __ia64_readl_relaxed
+#undef __ia64_readq_relaxed
+#undef __ia64_writeb
+#undef __ia64_writew
+#undef __ia64_writel
+#undef __ia64_writeq
+#undef __ia64_mmiowb
+
+unsigned int
+__ia64_inb (unsigned long port)
+{
+ return ___ia64_inb(port);
+}
+
+unsigned int
+__ia64_inw (unsigned long port)
+{
+ return ___ia64_inw(port);
+}
+
+unsigned int
+__ia64_inl (unsigned long port)
+{
+ return ___ia64_inl(port);
+}
+
+void
+__ia64_outb (unsigned char val, unsigned long port)
+{
+ ___ia64_outb(val, port);
+}
+
+void
+__ia64_outw (unsigned short val, unsigned long port)
+{
+ ___ia64_outw(val, port);
+}
+
+void
+__ia64_outl (unsigned int val, unsigned long port)
+{
+ ___ia64_outl(val, port);
+}
+
+unsigned char
+__ia64_readb (void __iomem *addr)
+{
+ return ___ia64_readb (addr);
+}
+
+unsigned short
+__ia64_readw (void __iomem *addr)
+{
+ return ___ia64_readw (addr);
+}
+
+unsigned int
+__ia64_readl (void __iomem *addr)
+{
+ return ___ia64_readl (addr);
+}
+
+unsigned long
+__ia64_readq (void __iomem *addr)
+{
+ return ___ia64_readq (addr);
+}
+
+unsigned char
+__ia64_readb_relaxed (void __iomem *addr)
+{
+ return ___ia64_readb (addr);
+}
+
+unsigned short
+__ia64_readw_relaxed (void __iomem *addr)
+{
+ return ___ia64_readw (addr);
+}
+
+unsigned int
+__ia64_readl_relaxed (void __iomem *addr)
+{
+ return ___ia64_readl (addr);
+}
+
+unsigned long
+__ia64_readq_relaxed (void __iomem *addr)
+{
+ return ___ia64_readq (addr);
+}
+
+void
+__ia64_mmiowb(void)
+{
+ ___ia64_mmiowb();
+}
+
+#endif /* CONFIG_IA64_GENERIC */
diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S
new file mode 100644
index 00000000000..19674ca2acf
--- /dev/null
+++ b/arch/ia64/lib/ip_fast_csum.S
@@ -0,0 +1,90 @@
+/*
+ * Optmized version of the ip_fast_csum() function
+ * Used for calculating IP header checksum
+ *
+ * Return: 16bit checksum, complemented
+ *
+ * Inputs:
+ * in0: address of buffer to checksum (char *)
+ * in1: length of the buffer (int)
+ *
+ * Copyright (C) 2002 Intel Corp.
+ * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
+ */
+
+#include <asm/asmmacro.h>
+
+/*
+ * Since we know that most likely this function is called with buf aligned
+ * on 4-byte boundary and 20 bytes in length, we can execution rather quickly
+ * versus calling generic version of do_csum, which has lots of overhead in
+ * handling various alignments and sizes. However, due to lack of constrains
+ * put on the function input argument, cases with alignment not on 4-byte or
+ * size not equal to 20 bytes will be handled by the generic do_csum function.
+ */
+
+#define in0 r32
+#define in1 r33
+#define ret0 r8
+
+GLOBAL_ENTRY(ip_fast_csum)
+ .prologue
+ .body
+ cmp.ne p6,p7=5,in1 // size other than 20 byte?
+ and r14=3,in0 // is it aligned on 4-byte?
+ add r15=4,in0 // second source pointer
+ ;;
+ cmp.ne.or.andcm p6,p7=r14,r0
+ ;;
+(p7) ld4 r20=[in0],8
+(p7) ld4 r21=[r15],8
+(p6) br.spnt .generic
+ ;;
+ ld4 r22=[in0],8
+ ld4 r23=[r15],8
+ ;;
+ ld4 r24=[in0]
+ add r20=r20,r21
+ add r22=r22,r23
+ ;;
+ add r20=r20,r22
+ ;;
+ add r20=r20,r24
+ ;;
+ shr.u ret0=r20,16 // now need to add the carry
+ zxt2 r20=r20
+ ;;
+ add r20=ret0,r20
+ ;;
+ shr.u ret0=r20,16 // add carry again
+ zxt2 r20=r20
+ ;;
+ add r20=ret0,r20
+ ;;
+ shr.u ret0=r20,16
+ zxt2 r20=r20
+ ;;
+ add r20=ret0,r20
+ ;;
+ andcm ret0=-1,r20
+ .restore sp // reset frame state
+ br.ret.sptk.many b0
+ ;;
+
+.generic:
+ .prologue
+ .save ar.pfs, r35
+ alloc r35=ar.pfs,2,2,2,0
+ .save rp, r34
+ mov r34=b0
+ .body
+ dep.z out1=in1,2,30
+ mov out0=in0
+ ;;
+ br.call.sptk.many b0=do_csum
+ ;;
+ andcm ret0=-1,ret0
+ mov ar.pfs=r35
+ mov b0=r34
+ br.ret.sptk.many b0
+END(ip_fast_csum)
diff --git a/arch/ia64/lib/memcpy.S b/arch/ia64/lib/memcpy.S
new file mode 100644
index 00000000000..448908d80b6
--- /dev/null
+++ b/arch/ia64/lib/memcpy.S
@@ -0,0 +1,301 @@
+/*
+ *
+ * Optimized version of the standard memcpy() function
+ *
+ * Inputs:
+ * in0: destination address
+ * in1: source address
+ * in2: number of bytes to copy
+ * Output:
+ * no return value
+ *
+ * Copyright (C) 2000-2001 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <asm/asmmacro.h>
+
+GLOBAL_ENTRY(memcpy)
+
+# define MEM_LAT 21 /* latency to memory */
+
+# define dst r2
+# define src r3
+# define retval r8
+# define saved_pfs r9
+# define saved_lc r10
+# define saved_pr r11
+# define cnt r16
+# define src2 r17
+# define t0 r18
+# define t1 r19
+# define t2 r20
+# define t3 r21
+# define t4 r22
+# define src_end r23
+
+# define N (MEM_LAT + 4)
+# define Nrot ((N + 7) & ~7)
+
+ /*
+ * First, check if everything (src, dst, len) is a multiple of eight. If
+ * so, we handle everything with no taken branches (other than the loop
+ * itself) and a small icache footprint. Otherwise, we jump off to
+ * the more general copy routine handling arbitrary
+ * sizes/alignment etc.
+ */
+ .prologue
+ .save ar.pfs, saved_pfs
+ alloc saved_pfs=ar.pfs,3,Nrot,0,Nrot
+ .save ar.lc, saved_lc
+ mov saved_lc=ar.lc
+ or t0=in0,in1
+ ;;
+
+ or t0=t0,in2
+ .save pr, saved_pr
+ mov saved_pr=pr
+
+ .body
+
+ cmp.eq p6,p0=in2,r0 // zero length?
+ mov retval=in0 // return dst
+(p6) br.ret.spnt.many rp // zero length, return immediately
+ ;;
+
+ mov dst=in0 // copy because of rotation
+ shr.u cnt=in2,3 // number of 8-byte words to copy
+ mov pr.rot=1<<16
+ ;;
+
+ adds cnt=-1,cnt // br.ctop is repeat/until
+ cmp.gtu p7,p0=16,in2 // copying less than 16 bytes?
+ mov ar.ec=N
+ ;;
+
+ and t0=0x7,t0
+ mov ar.lc=cnt
+ ;;
+ cmp.ne p6,p0=t0,r0
+
+ mov src=in1 // copy because of rotation
+(p7) br.cond.spnt.few .memcpy_short
+(p6) br.cond.spnt.few .memcpy_long
+ ;;
+ nop.m 0
+ ;;
+ nop.m 0
+ nop.i 0
+ ;;
+ nop.m 0
+ ;;
+ .rotr val[N]
+ .rotp p[N]
+ .align 32
+1: { .mib
+(p[0]) ld8 val[0]=[src],8
+ nop.i 0
+ brp.loop.imp 1b, 2f
+}
+2: { .mfb
+(p[N-1])st8 [dst]=val[N-1],8
+ nop.f 0
+ br.ctop.dptk.few 1b
+}
+ ;;
+ mov ar.lc=saved_lc
+ mov pr=saved_pr,-1
+ mov ar.pfs=saved_pfs
+ br.ret.sptk.many rp
+
+ /*
+ * Small (<16 bytes) unaligned copying is done via a simple byte-at-the-time
+ * copy loop. This performs relatively poorly on Itanium, but it doesn't
+ * get used very often (gcc inlines small copies) and due to atomicity
+ * issues, we want to avoid read-modify-write of entire words.
+ */
+ .align 32
+.memcpy_short:
+ adds cnt=-1,in2 // br.ctop is repeat/until
+ mov ar.ec=MEM_LAT
+ brp.loop.imp 1f, 2f
+ ;;
+ mov ar.lc=cnt
+ ;;
+ nop.m 0
+ ;;
+ nop.m 0
+ nop.i 0
+ ;;
+ nop.m 0
+ ;;
+ nop.m 0
+ ;;
+ /*
+ * It is faster to put a stop bit in the loop here because it makes
+ * the pipeline shorter (and latency is what matters on short copies).
+ */
+ .align 32
+1: { .mib
+(p[0]) ld1 val[0]=[src],1
+ nop.i 0
+ brp.loop.imp 1b, 2f
+} ;;
+2: { .mfb
+(p[MEM_LAT-1])st1 [dst]=val[MEM_LAT-1],1
+ nop.f 0
+ br.ctop.dptk.few 1b
+} ;;
+ mov ar.lc=saved_lc
+ mov pr=saved_pr,-1
+ mov ar.pfs=saved_pfs
+ br.ret.sptk.many rp
+
+ /*
+ * Large (>= 16 bytes) copying is done in a fancy way. Latency isn't
+ * an overriding concern here, but throughput is. We first do
+ * sub-word copying until the destination is aligned, then we check
+ * if the source is also aligned. If so, we do a simple load/store-loop
+ * until there are less than 8 bytes left over and then we do the tail,
+ * by storing the last few bytes using sub-word copying. If the source
+ * is not aligned, we branch off to the non-congruent loop.
+ *
+ * stage: op:
+ * 0 ld
+ * :
+ * MEM_LAT+3 shrp
+ * MEM_LAT+4 st
+ *
+ * On Itanium, the pipeline itself runs without stalls. However, br.ctop
+ * seems to introduce an unavoidable bubble in the pipeline so the overall
+ * latency is 2 cycles/iteration. This gives us a _copy_ throughput
+ * of 4 byte/cycle. Still not bad.
+ */
+# undef N
+# undef Nrot
+# define N (MEM_LAT + 5) /* number of stages */
+# define Nrot ((N+1 + 2 + 7) & ~7) /* number of rotating regs */
+
+#define LOG_LOOP_SIZE 6
+
+.memcpy_long:
+ alloc t3=ar.pfs,3,Nrot,0,Nrot // resize register frame
+ and t0=-8,src // t0 = src & ~7
+ and t2=7,src // t2 = src & 7
+ ;;
+ ld8 t0=[t0] // t0 = 1st source word
+ adds src2=7,src // src2 = (src + 7)
+ sub t4=r0,dst // t4 = -dst
+ ;;
+ and src2=-8,src2 // src2 = (src + 7) & ~7
+ shl t2=t2,3 // t2 = 8*(src & 7)
+ shl t4=t4,3 // t4 = 8*(dst & 7)
+ ;;
+ ld8 t1=[src2] // t1 = 1st source word if src is 8-byte aligned, 2nd otherwise
+ sub t3=64,t2 // t3 = 64-8*(src & 7)
+ shr.u t0=t0,t2
+ ;;
+ add src_end=src,in2
+ shl t1=t1,t3
+ mov pr=t4,0x38 // (p5,p4,p3)=(dst & 7)
+ ;;
+ or t0=t0,t1
+ mov cnt=r0
+ adds src_end=-1,src_end
+ ;;
+(p3) st1 [dst]=t0,1
+(p3) shr.u t0=t0,8
+(p3) adds cnt=1,cnt
+ ;;
+(p4) st2 [dst]=t0,2
+(p4) shr.u t0=t0,16
+(p4) adds cnt=2,cnt
+ ;;
+(p5) st4 [dst]=t0,4
+(p5) adds cnt=4,cnt
+ and src_end=-8,src_end // src_end = last word of source buffer
+ ;;
+
+ // At this point, dst is aligned to 8 bytes and there at least 16-7=9 bytes left to copy:
+
+1:{ add src=cnt,src // make src point to remainder of source buffer
+ sub cnt=in2,cnt // cnt = number of bytes left to copy
+ mov t4=ip
+ } ;;
+ and src2=-8,src // align source pointer
+ adds t4=.memcpy_loops-1b,t4
+ mov ar.ec=N
+
+ and t0=7,src // t0 = src & 7
+ shr.u t2=cnt,3 // t2 = number of 8-byte words left to copy
+ shl cnt=cnt,3 // move bits 0-2 to 3-5
+ ;;
+
+ .rotr val[N+1], w[2]
+ .rotp p[N]
+
+ cmp.ne p6,p0=t0,r0 // is src aligned, too?
+ shl t0=t0,LOG_LOOP_SIZE // t0 = 8*(src & 7)
+ adds t2=-1,t2 // br.ctop is repeat/until
+ ;;
+ add t4=t0,t4
+ mov pr=cnt,0x38 // set (p5,p4,p3) to # of bytes last-word bytes to copy
+ mov ar.lc=t2
+ ;;
+ nop.m 0
+ ;;
+ nop.m 0
+ nop.i 0
+ ;;
+ nop.m 0
+ ;;
+(p6) ld8 val[1]=[src2],8 // prime the pump...
+ mov b6=t4
+ br.sptk.few b6
+ ;;
+
+.memcpy_tail:
+ // At this point, (p5,p4,p3) are set to the number of bytes left to copy (which is
+ // less than 8) and t0 contains the last few bytes of the src buffer:
+(p5) st4 [dst]=t0,4
+(p5) shr.u t0=t0,32
+ mov ar.lc=saved_lc
+ ;;
+(p4) st2 [dst]=t0,2
+(p4) shr.u t0=t0,16
+ mov ar.pfs=saved_pfs
+ ;;
+(p3) st1 [dst]=t0
+ mov pr=saved_pr,-1
+ br.ret.sptk.many rp
+
+///////////////////////////////////////////////////////
+ .align 64
+
+#define COPY(shift,index) \
+ 1: { .mib \
+ (p[0]) ld8 val[0]=[src2],8; \
+ (p[MEM_LAT+3]) shrp w[0]=val[MEM_LAT+3],val[MEM_LAT+4-index],shift; \
+ brp.loop.imp 1b, 2f \
+ }; \
+ 2: { .mfb \
+ (p[MEM_LAT+4]) st8 [dst]=w[1],8; \
+ nop.f 0; \
+ br.ctop.dptk.few 1b; \
+ }; \
+ ;; \
+ ld8 val[N-1]=[src_end]; /* load last word (may be same as val[N]) */ \
+ ;; \
+ shrp t0=val[N-1],val[N-index],shift; \
+ br .memcpy_tail
+.memcpy_loops:
+ COPY(0, 1) /* no point special casing this---it doesn't go any faster without shrp */
+ COPY(8, 0)
+ COPY(16, 0)
+ COPY(24, 0)
+ COPY(32, 0)
+ COPY(40, 0)
+ COPY(48, 0)
+ COPY(56, 0)
+
+END(memcpy)
diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S
new file mode 100644
index 00000000000..6f26ef7cc23
--- /dev/null
+++ b/arch/ia64/lib/memcpy_mck.S
@@ -0,0 +1,661 @@
+/*
+ * Itanium 2-optimized version of memcpy and copy_user function
+ *
+ * Inputs:
+ * in0: destination address
+ * in1: source address
+ * in2: number of bytes to copy
+ * Output:
+ * 0 if success, or number of byte NOT copied if error occurred.
+ *
+ * Copyright (C) 2002 Intel Corp.
+ * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
+ */
+#include <linux/config.h>
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+#define EK(y...) EX(y)
+
+/* McKinley specific optimization */
+
+#define retval r8
+#define saved_pfs r31
+#define saved_lc r10
+#define saved_pr r11
+#define saved_in0 r14
+#define saved_in1 r15
+#define saved_in2 r16
+
+#define src0 r2
+#define src1 r3
+#define dst0 r17
+#define dst1 r18
+#define cnt r9
+
+/* r19-r30 are temp for each code section */
+#define PREFETCH_DIST 8
+#define src_pre_mem r19
+#define dst_pre_mem r20
+#define src_pre_l2 r21
+#define dst_pre_l2 r22
+#define t1 r23
+#define t2 r24
+#define t3 r25
+#define t4 r26
+#define t5 t1 // alias!
+#define t6 t2 // alias!
+#define t7 t3 // alias!
+#define n8 r27
+#define t9 t5 // alias!
+#define t10 t4 // alias!
+#define t11 t7 // alias!
+#define t12 t6 // alias!
+#define t14 t10 // alias!
+#define t13 r28
+#define t15 r29
+#define tmp r30
+
+/* defines for long_copy block */
+#define A 0
+#define B (PREFETCH_DIST)
+#define C (B + PREFETCH_DIST)
+#define D (C + 1)
+#define N (D + 1)
+#define Nrot ((N + 7) & ~7)
+
+/* alias */
+#define in0 r32
+#define in1 r33
+#define in2 r34
+
+GLOBAL_ENTRY(memcpy)
+ and r28=0x7,in0
+ and r29=0x7,in1
+ mov f6=f0
+ br.cond.sptk .common_code
+ ;;
+GLOBAL_ENTRY(__copy_user)
+ .prologue
+// check dest alignment
+ and r28=0x7,in0
+ and r29=0x7,in1
+ mov f6=f1
+ mov saved_in0=in0 // save dest pointer
+ mov saved_in1=in1 // save src pointer
+ mov saved_in2=in2 // save len
+ ;;
+.common_code:
+ cmp.gt p15,p0=8,in2 // check for small size
+ cmp.ne p13,p0=0,r28 // check dest alignment
+ cmp.ne p14,p0=0,r29 // check src alignment
+ add src0=0,in1
+ sub r30=8,r28 // for .align_dest
+ mov retval=r0 // initialize return value
+ ;;
+ add dst0=0,in0
+ add dst1=1,in0 // dest odd index
+ cmp.le p6,p0 = 1,r30 // for .align_dest
+(p15) br.cond.dpnt .memcpy_short
+(p13) br.cond.dpnt .align_dest
+(p14) br.cond.dpnt .unaligned_src
+ ;;
+
+// both dest and src are aligned on 8-byte boundary
+.aligned_src:
+ .save ar.pfs, saved_pfs
+ alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
+ .save pr, saved_pr
+ mov saved_pr=pr
+
+ shr.u cnt=in2,7 // this much cache line
+ ;;
+ cmp.lt p6,p0=2*PREFETCH_DIST,cnt
+ cmp.lt p7,p8=1,cnt
+ .save ar.lc, saved_lc
+ mov saved_lc=ar.lc
+ .body
+ add cnt=-1,cnt
+ add src_pre_mem=0,in1 // prefetch src pointer
+ add dst_pre_mem=0,in0 // prefetch dest pointer
+ ;;
+(p7) mov ar.lc=cnt // prefetch count
+(p8) mov ar.lc=r0
+(p6) br.cond.dpnt .long_copy
+ ;;
+
+.prefetch:
+ lfetch.fault [src_pre_mem], 128
+ lfetch.fault.excl [dst_pre_mem], 128
+ br.cloop.dptk.few .prefetch
+ ;;
+
+.medium_copy:
+ and tmp=31,in2 // copy length after iteration
+ shr.u r29=in2,5 // number of 32-byte iteration
+ add dst1=8,dst0 // 2nd dest pointer
+ ;;
+ add cnt=-1,r29 // ctop iteration adjustment
+ cmp.eq p10,p0=r29,r0 // do we really need to loop?
+ add src1=8,src0 // 2nd src pointer
+ cmp.le p6,p0=8,tmp
+ ;;
+ cmp.le p7,p0=16,tmp
+ mov ar.lc=cnt // loop setup
+ cmp.eq p16,p17 = r0,r0
+ mov ar.ec=2
+(p10) br.dpnt.few .aligned_src_tail
+ ;;
+ TEXT_ALIGN(32)
+1:
+EX(.ex_handler, (p16) ld8 r34=[src0],16)
+EK(.ex_handler, (p16) ld8 r38=[src1],16)
+EX(.ex_handler, (p17) st8 [dst0]=r33,16)
+EK(.ex_handler, (p17) st8 [dst1]=r37,16)
+ ;;
+EX(.ex_handler, (p16) ld8 r32=[src0],16)
+EK(.ex_handler, (p16) ld8 r36=[src1],16)
+EX(.ex_handler, (p16) st8 [dst0]=r34,16)
+EK(.ex_handler, (p16) st8 [dst1]=r38,16)
+ br.ctop.dptk.few 1b
+ ;;
+
+.aligned_src_tail:
+EX(.ex_handler, (p6) ld8 t1=[src0])
+ mov ar.lc=saved_lc
+ mov ar.pfs=saved_pfs
+EX(.ex_hndlr_s, (p7) ld8 t2=[src1],8)
+ cmp.le p8,p0=24,tmp
+ and r21=-8,tmp
+ ;;
+EX(.ex_hndlr_s, (p8) ld8 t3=[src1])
+EX(.ex_handler, (p6) st8 [dst0]=t1) // store byte 1
+ and in2=7,tmp // remaining length
+EX(.ex_hndlr_d, (p7) st8 [dst1]=t2,8) // store byte 2
+ add src0=src0,r21 // setting up src pointer
+ add dst0=dst0,r21 // setting up dest pointer
+ ;;
+EX(.ex_handler, (p8) st8 [dst1]=t3) // store byte 3
+ mov pr=saved_pr,-1
+ br.dptk.many .memcpy_short
+ ;;
+
+/* code taken from copy_page_mck */
+.long_copy:
+ .rotr v[2*PREFETCH_DIST]
+ .rotp p[N]
+
+ mov src_pre_mem = src0
+ mov pr.rot = 0x10000
+ mov ar.ec = 1 // special unrolled loop
+
+ mov dst_pre_mem = dst0
+
+ add src_pre_l2 = 8*8, src0
+ add dst_pre_l2 = 8*8, dst0
+ ;;
+ add src0 = 8, src_pre_mem // first t1 src
+ mov ar.lc = 2*PREFETCH_DIST - 1
+ shr.u cnt=in2,7 // number of lines
+ add src1 = 3*8, src_pre_mem // first t3 src
+ add dst0 = 8, dst_pre_mem // first t1 dst
+ add dst1 = 3*8, dst_pre_mem // first t3 dst
+ ;;
+ and tmp=127,in2 // remaining bytes after this block
+ add cnt = -(2*PREFETCH_DIST) - 1, cnt
+ // same as .line_copy loop, but with all predicated-off instructions removed:
+.prefetch_loop:
+EX(.ex_hndlr_lcpy_1, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0
+EK(.ex_hndlr_lcpy_1, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2
+ br.ctop.sptk .prefetch_loop
+ ;;
+ cmp.eq p16, p0 = r0, r0 // reset p16 to 1
+ mov ar.lc = cnt
+ mov ar.ec = N // # of stages in pipeline
+ ;;
+.line_copy:
+EX(.ex_handler, (p[D]) ld8 t2 = [src0], 3*8) // M0
+EK(.ex_handler, (p[D]) ld8 t4 = [src1], 3*8) // M1
+EX(.ex_handler_lcpy, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 prefetch dst from memory
+EK(.ex_handler_lcpy, (p[D]) st8 [dst_pre_l2] = n8, 128) // M3 prefetch dst from L2
+ ;;
+EX(.ex_handler_lcpy, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 prefetch src from memory
+EK(.ex_handler_lcpy, (p[C]) ld8 n8 = [src_pre_l2], 128) // M1 prefetch src from L2
+EX(.ex_handler, (p[D]) st8 [dst0] = t1, 8) // M2
+EK(.ex_handler, (p[D]) st8 [dst1] = t3, 8) // M3
+ ;;
+EX(.ex_handler, (p[D]) ld8 t5 = [src0], 8)
+EK(.ex_handler, (p[D]) ld8 t7 = [src1], 3*8)
+EX(.ex_handler, (p[D]) st8 [dst0] = t2, 3*8)
+EK(.ex_handler, (p[D]) st8 [dst1] = t4, 3*8)
+ ;;
+EX(.ex_handler, (p[D]) ld8 t6 = [src0], 3*8)
+EK(.ex_handler, (p[D]) ld8 t10 = [src1], 8)
+EX(.ex_handler, (p[D]) st8 [dst0] = t5, 8)
+EK(.ex_handler, (p[D]) st8 [dst1] = t7, 3*8)
+ ;;
+EX(.ex_handler, (p[D]) ld8 t9 = [src0], 3*8)
+EK(.ex_handler, (p[D]) ld8 t11 = [src1], 3*8)
+EX(.ex_handler, (p[D]) st8 [dst0] = t6, 3*8)
+EK(.ex_handler, (p[D]) st8 [dst1] = t10, 8)
+ ;;
+EX(.ex_handler, (p[D]) ld8 t12 = [src0], 8)
+EK(.ex_handler, (p[D]) ld8 t14 = [src1], 8)
+EX(.ex_handler, (p[D]) st8 [dst0] = t9, 3*8)
+EK(.ex_handler, (p[D]) st8 [dst1] = t11, 3*8)
+ ;;
+EX(.ex_handler, (p[D]) ld8 t13 = [src0], 4*8)
+EK(.ex_handler, (p[D]) ld8 t15 = [src1], 4*8)
+EX(.ex_handler, (p[D]) st8 [dst0] = t12, 8)
+EK(.ex_handler, (p[D]) st8 [dst1] = t14, 8)
+ ;;
+EX(.ex_handler, (p[C]) ld8 t1 = [src0], 8)
+EK(.ex_handler, (p[C]) ld8 t3 = [src1], 8)
+EX(.ex_handler, (p[D]) st8 [dst0] = t13, 4*8)
+EK(.ex_handler, (p[D]) st8 [dst1] = t15, 4*8)
+ br.ctop.sptk .line_copy
+ ;;
+
+ add dst0=-8,dst0
+ add src0=-8,src0
+ mov in2=tmp
+ .restore sp
+ br.sptk.many .medium_copy
+ ;;
+
+#define BLOCK_SIZE 128*32
+#define blocksize r23
+#define curlen r24
+
+// dest is on 8-byte boundary, src is not. We need to do
+// ld8-ld8, shrp, then st8. Max 8 byte copy per cycle.
+.unaligned_src:
+ .prologue
+ .save ar.pfs, saved_pfs
+ alloc saved_pfs=ar.pfs,3,5,0,8
+ .save ar.lc, saved_lc
+ mov saved_lc=ar.lc
+ .save pr, saved_pr
+ mov saved_pr=pr
+ .body
+.4k_block:
+ mov saved_in0=dst0 // need to save all input arguments
+ mov saved_in2=in2
+ mov blocksize=BLOCK_SIZE
+ ;;
+ cmp.lt p6,p7=blocksize,in2
+ mov saved_in1=src0
+ ;;
+(p6) mov in2=blocksize
+ ;;
+ shr.u r21=in2,7 // this much cache line
+ shr.u r22=in2,4 // number of 16-byte iteration
+ and curlen=15,in2 // copy length after iteration
+ and r30=7,src0 // source alignment
+ ;;
+ cmp.lt p7,p8=1,r21
+ add cnt=-1,r21
+ ;;
+
+ add src_pre_mem=0,src0 // prefetch src pointer
+ add dst_pre_mem=0,dst0 // prefetch dest pointer
+ and src0=-8,src0 // 1st src pointer
+(p7) mov ar.lc = r21
+(p8) mov ar.lc = r0
+ ;;
+ TEXT_ALIGN(32)
+1: lfetch.fault [src_pre_mem], 128
+ lfetch.fault.excl [dst_pre_mem], 128
+ br.cloop.dptk.few 1b
+ ;;
+
+ shladd dst1=r22,3,dst0 // 2nd dest pointer
+ shladd src1=r22,3,src0 // 2nd src pointer
+ cmp.eq p8,p9=r22,r0 // do we really need to loop?
+ cmp.le p6,p7=8,curlen; // have at least 8 byte remaining?
+ add cnt=-1,r22 // ctop iteration adjustment
+ ;;
+EX(.ex_handler, (p9) ld8 r33=[src0],8) // loop primer
+EK(.ex_handler, (p9) ld8 r37=[src1],8)
+(p8) br.dpnt.few .noloop
+ ;;
+
+// The jump address is calculated based on src alignment. The COPYU
+// macro below need to confine its size to power of two, so an entry
+// can be caulated using shl instead of an expensive multiply. The
+// size is then hard coded by the following #define to match the
+// actual size. This make it somewhat tedious when COPYU macro gets
+// changed and this need to be adjusted to match.
+#define LOOP_SIZE 6
+1:
+ mov r29=ip // jmp_table thread
+ mov ar.lc=cnt
+ ;;
+ add r29=.jump_table - 1b - (.jmp1-.jump_table), r29
+ shl r28=r30, LOOP_SIZE // jmp_table thread
+ mov ar.ec=2 // loop setup
+ ;;
+ add r29=r29,r28 // jmp_table thread
+ cmp.eq p16,p17=r0,r0
+ ;;
+ mov b6=r29 // jmp_table thread
+ ;;
+ br.cond.sptk.few b6
+
+// for 8-15 byte case
+// We will skip the loop, but need to replicate the side effect
+// that the loop produces.
+.noloop:
+EX(.ex_handler, (p6) ld8 r37=[src1],8)
+ add src0=8,src0
+(p6) shl r25=r30,3
+ ;;
+EX(.ex_handler, (p6) ld8 r27=[src1])
+(p6) shr.u r28=r37,r25
+(p6) sub r26=64,r25
+ ;;
+(p6) shl r27=r27,r26
+ ;;
+(p6) or r21=r28,r27
+
+.unaligned_src_tail:
+/* check if we have more than blocksize to copy, if so go back */
+ cmp.gt p8,p0=saved_in2,blocksize
+ ;;
+(p8) add dst0=saved_in0,blocksize
+(p8) add src0=saved_in1,blocksize
+(p8) sub in2=saved_in2,blocksize
+(p8) br.dpnt .4k_block
+ ;;
+
+/* we have up to 15 byte to copy in the tail.
+ * part of work is already done in the jump table code
+ * we are at the following state.
+ * src side:
+ *
+ * xxxxxx xx <----- r21 has xxxxxxxx already
+ * -------- -------- --------
+ * 0 8 16
+ * ^
+ * |
+ * src1
+ *
+ * dst
+ * -------- -------- --------
+ * ^
+ * |
+ * dst1
+ */
+EX(.ex_handler, (p6) st8 [dst1]=r21,8) // more than 8 byte to copy
+(p6) add curlen=-8,curlen // update length
+ mov ar.pfs=saved_pfs
+ ;;
+ mov ar.lc=saved_lc
+ mov pr=saved_pr,-1
+ mov in2=curlen // remaining length
+ mov dst0=dst1 // dest pointer
+ add src0=src1,r30 // forward by src alignment
+ ;;
+
+// 7 byte or smaller.
+.memcpy_short:
+ cmp.le p8,p9 = 1,in2
+ cmp.le p10,p11 = 2,in2
+ cmp.le p12,p13 = 3,in2
+ cmp.le p14,p15 = 4,in2
+ add src1=1,src0 // second src pointer
+ add dst1=1,dst0 // second dest pointer
+ ;;
+
+EX(.ex_handler_short, (p8) ld1 t1=[src0],2)
+EK(.ex_handler_short, (p10) ld1 t2=[src1],2)
+(p9) br.ret.dpnt rp // 0 byte copy
+ ;;
+
+EX(.ex_handler_short, (p8) st1 [dst0]=t1,2)
+EK(.ex_handler_short, (p10) st1 [dst1]=t2,2)
+(p11) br.ret.dpnt rp // 1 byte copy
+
+EX(.ex_handler_short, (p12) ld1 t3=[src0],2)
+EK(.ex_handler_short, (p14) ld1 t4=[src1],2)
+(p13) br.ret.dpnt rp // 2 byte copy
+ ;;
+
+ cmp.le p6,p7 = 5,in2
+ cmp.le p8,p9 = 6,in2
+ cmp.le p10,p11 = 7,in2
+
+EX(.ex_handler_short, (p12) st1 [dst0]=t3,2)
+EK(.ex_handler_short, (p14) st1 [dst1]=t4,2)
+(p15) br.ret.dpnt rp // 3 byte copy
+ ;;
+
+EX(.ex_handler_short, (p6) ld1 t5=[src0],2)
+EK(.ex_handler_short, (p8) ld1 t6=[src1],2)
+(p7) br.ret.dpnt rp // 4 byte copy
+ ;;
+
+EX(.ex_handler_short, (p6) st1 [dst0]=t5,2)
+EK(.ex_handler_short, (p8) st1 [dst1]=t6,2)
+(p9) br.ret.dptk rp // 5 byte copy
+
+EX(.ex_handler_short, (p10) ld1 t7=[src0],2)
+(p11) br.ret.dptk rp // 6 byte copy
+ ;;
+
+EX(.ex_handler_short, (p10) st1 [dst0]=t7,2)
+ br.ret.dptk rp // done all cases
+
+
+/* Align dest to nearest 8-byte boundary. We know we have at
+ * least 7 bytes to copy, enough to crawl to 8-byte boundary.
+ * Actual number of byte to crawl depend on the dest alignment.
+ * 7 byte or less is taken care at .memcpy_short
+
+ * src0 - source even index
+ * src1 - source odd index
+ * dst0 - dest even index
+ * dst1 - dest odd index
+ * r30 - distance to 8-byte boundary
+ */
+
+.align_dest:
+ add src1=1,in1 // source odd index
+ cmp.le p7,p0 = 2,r30 // for .align_dest
+ cmp.le p8,p0 = 3,r30 // for .align_dest
+EX(.ex_handler_short, (p6) ld1 t1=[src0],2)
+ cmp.le p9,p0 = 4,r30 // for .align_dest
+ cmp.le p10,p0 = 5,r30
+ ;;
+EX(.ex_handler_short, (p7) ld1 t2=[src1],2)
+EK(.ex_handler_short, (p8) ld1 t3=[src0],2)
+ cmp.le p11,p0 = 6,r30
+EX(.ex_handler_short, (p6) st1 [dst0] = t1,2)
+ cmp.le p12,p0 = 7,r30
+ ;;
+EX(.ex_handler_short, (p9) ld1 t4=[src1],2)
+EK(.ex_handler_short, (p10) ld1 t5=[src0],2)
+EX(.ex_handler_short, (p7) st1 [dst1] = t2,2)
+EK(.ex_handler_short, (p8) st1 [dst0] = t3,2)
+ ;;
+EX(.ex_handler_short, (p11) ld1 t6=[src1],2)
+EK(.ex_handler_short, (p12) ld1 t7=[src0],2)
+ cmp.eq p6,p7=r28,r29
+EX(.ex_handler_short, (p9) st1 [dst1] = t4,2)
+EK(.ex_handler_short, (p10) st1 [dst0] = t5,2)
+ sub in2=in2,r30
+ ;;
+EX(.ex_handler_short, (p11) st1 [dst1] = t6,2)
+EK(.ex_handler_short, (p12) st1 [dst0] = t7)
+ add dst0=in0,r30 // setup arguments
+ add src0=in1,r30
+(p6) br.cond.dptk .aligned_src
+(p7) br.cond.dpnt .unaligned_src
+ ;;
+
+/* main loop body in jump table format */
+#define COPYU(shift) \
+1: \
+EX(.ex_handler, (p16) ld8 r32=[src0],8); /* 1 */ \
+EK(.ex_handler, (p16) ld8 r36=[src1],8); \
+ (p17) shrp r35=r33,r34,shift;; /* 1 */ \
+EX(.ex_handler, (p6) ld8 r22=[src1]); /* common, prime for tail section */ \
+ nop.m 0; \
+ (p16) shrp r38=r36,r37,shift; \
+EX(.ex_handler, (p17) st8 [dst0]=r35,8); /* 1 */ \
+EK(.ex_handler, (p17) st8 [dst1]=r39,8); \
+ br.ctop.dptk.few 1b;; \
+ (p7) add src1=-8,src1; /* back out for <8 byte case */ \
+ shrp r21=r22,r38,shift; /* speculative work */ \
+ br.sptk.few .unaligned_src_tail /* branch out of jump table */ \
+ ;;
+ TEXT_ALIGN(32)
+.jump_table:
+ COPYU(8) // unaligned cases
+.jmp1:
+ COPYU(16)
+ COPYU(24)
+ COPYU(32)
+ COPYU(40)
+ COPYU(48)
+ COPYU(56)
+
+#undef A
+#undef B
+#undef C
+#undef D
+END(memcpy)
+
+/*
+ * Due to lack of local tag support in gcc 2.x assembler, it is not clear which
+ * instruction failed in the bundle. The exception algorithm is that we
+ * first figure out the faulting address, then detect if there is any
+ * progress made on the copy, if so, redo the copy from last known copied
+ * location up to the faulting address (exclusive). In the copy_from_user
+ * case, remaining byte in kernel buffer will be zeroed.
+ *
+ * Take copy_from_user as an example, in the code there are multiple loads
+ * in a bundle and those multiple loads could span over two pages, the
+ * faulting address is calculated as page_round_down(max(src0, src1)).
+ * This is based on knowledge that if we can access one byte in a page, we
+ * can access any byte in that page.
+ *
+ * predicate used in the exception handler:
+ * p6-p7: direction
+ * p10-p11: src faulting addr calculation
+ * p12-p13: dst faulting addr calculation
+ */
+
+#define A r19
+#define B r20
+#define C r21
+#define D r22
+#define F r28
+
+#define memset_arg0 r32
+#define memset_arg2 r33
+
+#define saved_retval loc0
+#define saved_rtlink loc1
+#define saved_pfs_stack loc2
+
+.ex_hndlr_s:
+ add src0=8,src0
+ br.sptk .ex_handler
+ ;;
+.ex_hndlr_d:
+ add dst0=8,dst0
+ br.sptk .ex_handler
+ ;;
+.ex_hndlr_lcpy_1:
+ mov src1=src_pre_mem
+ mov dst1=dst_pre_mem
+ cmp.gtu p10,p11=src_pre_mem,saved_in1
+ cmp.gtu p12,p13=dst_pre_mem,saved_in0
+ ;;
+(p10) add src0=8,saved_in1
+(p11) mov src0=saved_in1
+(p12) add dst0=8,saved_in0
+(p13) mov dst0=saved_in0
+ br.sptk .ex_handler
+.ex_handler_lcpy:
+ // in line_copy block, the preload addresses should always ahead
+ // of the other two src/dst pointers. Furthermore, src1/dst1 should
+ // always ahead of src0/dst0.
+ mov src1=src_pre_mem
+ mov dst1=dst_pre_mem
+.ex_handler:
+ mov pr=saved_pr,-1 // first restore pr, lc, and pfs
+ mov ar.lc=saved_lc
+ mov ar.pfs=saved_pfs
+ ;;
+.ex_handler_short: // fault occurred in these sections didn't change pr, lc, pfs
+ cmp.ltu p6,p7=saved_in0, saved_in1 // get the copy direction
+ cmp.ltu p10,p11=src0,src1
+ cmp.ltu p12,p13=dst0,dst1
+ fcmp.eq p8,p0=f6,f0 // is it memcpy?
+ mov tmp = dst0
+ ;;
+(p11) mov src1 = src0 // pick the larger of the two
+(p13) mov dst0 = dst1 // make dst0 the smaller one
+(p13) mov dst1 = tmp // and dst1 the larger one
+ ;;
+(p6) dep F = r0,dst1,0,PAGE_SHIFT // usr dst round down to page boundary
+(p7) dep F = r0,src1,0,PAGE_SHIFT // usr src round down to page boundary
+ ;;
+(p6) cmp.le p14,p0=dst0,saved_in0 // no progress has been made on store
+(p7) cmp.le p14,p0=src0,saved_in1 // no progress has been made on load
+ mov retval=saved_in2
+(p8) ld1 tmp=[src1] // force an oops for memcpy call
+(p8) st1 [dst1]=r0 // force an oops for memcpy call
+(p14) br.ret.sptk.many rp
+
+/*
+ * The remaining byte to copy is calculated as:
+ *
+ * A = (faulting_addr - orig_src) -> len to faulting ld address
+ * or
+ * (faulting_addr - orig_dst) -> len to faulting st address
+ * B = (cur_dst - orig_dst) -> len copied so far
+ * C = A - B -> len need to be copied
+ * D = orig_len - A -> len need to be zeroed
+ */
+(p6) sub A = F, saved_in0
+(p7) sub A = F, saved_in1
+ clrrrb
+ ;;
+ alloc saved_pfs_stack=ar.pfs,3,3,3,0
+ sub B = dst0, saved_in0 // how many byte copied so far
+ ;;
+ sub C = A, B
+ sub D = saved_in2, A
+ ;;
+ cmp.gt p8,p0=C,r0 // more than 1 byte?
+ add memset_arg0=saved_in0, A
+(p6) mov memset_arg2=0 // copy_to_user should not call memset
+(p7) mov memset_arg2=D // copy_from_user need to have kbuf zeroed
+ mov r8=0
+ mov saved_retval = D
+ mov saved_rtlink = b0
+
+ add out0=saved_in0, B
+ add out1=saved_in1, B
+ mov out2=C
+(p8) br.call.sptk.few b0=__copy_user // recursive call
+ ;;
+
+ add saved_retval=saved_retval,r8 // above might return non-zero value
+ cmp.gt p8,p0=memset_arg2,r0 // more than 1 byte?
+ mov out0=memset_arg0 // *s
+ mov out1=r0 // c
+ mov out2=memset_arg2 // n
+(p8) br.call.sptk.few b0=memset
+ ;;
+
+ mov retval=saved_retval
+ mov ar.pfs=saved_pfs_stack
+ mov b0=saved_rtlink
+ br.ret.sptk.many rp
+
+/* end of McKinley specific optimization */
+END(__copy_user)
diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S
new file mode 100644
index 00000000000..bd8cf907fe2
--- /dev/null
+++ b/arch/ia64/lib/memset.S
@@ -0,0 +1,362 @@
+/* Optimized version of the standard memset() function.
+
+ Copyright (c) 2002 Hewlett-Packard Co/CERN
+ Sverre Jarp <Sverre.Jarp@cern.ch>
+
+ Return: dest
+
+ Inputs:
+ in0: dest
+ in1: value
+ in2: count
+
+ The algorithm is fairly straightforward: set byte by byte until we
+ we get to a 16B-aligned address, then loop on 128 B chunks using an
+ early store as prefetching, then loop on 32B chucks, then clear remaining
+ words, finally clear remaining bytes.
+ Since a stf.spill f0 can store 16B in one go, we use this instruction
+ to get peak speed when value = 0. */
+
+#include <asm/asmmacro.h>
+#undef ret
+
+#define dest in0
+#define value in1
+#define cnt in2
+
+#define tmp r31
+#define save_lc r30
+#define ptr0 r29
+#define ptr1 r28
+#define ptr2 r27
+#define ptr3 r26
+#define ptr9 r24
+#define loopcnt r23
+#define linecnt r22
+#define bytecnt r21
+
+#define fvalue f6
+
+// This routine uses only scratch predicate registers (p6 - p15)
+#define p_scr p6 // default register for same-cycle branches
+#define p_nz p7
+#define p_zr p8
+#define p_unalgn p9
+#define p_y p11
+#define p_n p12
+#define p_yy p13
+#define p_nn p14
+
+#define MIN1 15
+#define MIN1P1HALF 8
+#define LINE_SIZE 128
+#define LSIZE_SH 7 // shift amount
+#define PREF_AHEAD 8
+
+GLOBAL_ENTRY(memset)
+{ .mmi
+ .prologue
+ alloc tmp = ar.pfs, 3, 0, 0, 0
+ .body
+ lfetch.nt1 [dest] //
+ .save ar.lc, save_lc
+ mov.i save_lc = ar.lc
+} { .mmi
+ mov ret0 = dest // return value
+ cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero
+ cmp.eq p_scr, p0 = cnt, r0
+;; }
+{ .mmi
+ and ptr2 = -(MIN1+1), dest // aligned address
+ and tmp = MIN1, dest // prepare to check for correct alignment
+ tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U)
+} { .mib
+ mov ptr1 = dest
+ mux1 value = value, @brcst // create 8 identical bytes in word
+(p_scr) br.ret.dpnt.many rp // return immediately if count = 0
+;; }
+{ .mib
+ cmp.ne p_unalgn, p0 = tmp, r0 //
+} { .mib
+ sub bytecnt = (MIN1+1), tmp // NB: # of bytes to move is 1 higher than loopcnt
+ cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task?
+(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U)
+;; }
+{ .mmi
+(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment
+(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment
+(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ?
+;; }
+{ .mib
+(p_y) add cnt = -8, cnt //
+(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ?
+} { .mib
+(p_y) st8 [ptr2] = value,-4 //
+(p_n) add ptr2 = 4, ptr2 //
+;; }
+{ .mib
+(p_yy) add cnt = -4, cnt //
+(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ?
+} { .mib
+(p_yy) st4 [ptr2] = value,-2 //
+(p_nn) add ptr2 = 2, ptr2 //
+;; }
+{ .mmi
+ mov tmp = LINE_SIZE+1 // for compare
+(p_y) add cnt = -2, cnt //
+(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ?
+} { .mmi
+ setf.sig fvalue=value // transfer value to FLP side
+(p_y) st2 [ptr2] = value,-1 //
+(p_n) add ptr2 = 1, ptr2 //
+;; }
+
+{ .mmi
+(p_yy) st1 [ptr2] = value //
+ cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task?
+} { .mbb
+(p_yy) add cnt = -1, cnt //
+(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few
+;; }
+
+{ .mib
+ nop.m 0
+ shr.u linecnt = cnt, LSIZE_SH
+(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill
+;; }
+
+ TEXT_ALIGN(32) // --------------------- // L1A: store ahead into cache lines; fill later
+{ .mmi
+ and tmp = -(LINE_SIZE), cnt // compute end of range
+ mov ptr9 = ptr1 // used for prefetching
+ and cnt = (LINE_SIZE-1), cnt // remainder
+} { .mmi
+ mov loopcnt = PREF_AHEAD-1 // default prefetch loop
+ cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
+;; }
+{ .mmi
+(p_scr) add loopcnt = -1, linecnt //
+ add ptr2 = 8, ptr1 // start of stores (beyond prefetch stores)
+ add ptr1 = tmp, ptr1 // first address beyond total range
+;; }
+{ .mmi
+ add tmp = -1, linecnt // next loop count
+ mov.i ar.lc = loopcnt //
+;; }
+.pref_l1a:
+{ .mib
+ stf8 [ptr9] = fvalue, 128 // Do stores one cache line apart
+ nop.i 0
+ br.cloop.dptk.few .pref_l1a
+;; }
+{ .mmi
+ add ptr0 = 16, ptr2 // Two stores in parallel
+ mov.i ar.lc = tmp //
+;; }
+.l1ax:
+ { .mmi
+ stf8 [ptr2] = fvalue, 8
+ stf8 [ptr0] = fvalue, 8
+ ;; }
+ { .mmi
+ stf8 [ptr2] = fvalue, 24
+ stf8 [ptr0] = fvalue, 24
+ ;; }
+ { .mmi
+ stf8 [ptr2] = fvalue, 8
+ stf8 [ptr0] = fvalue, 8
+ ;; }
+ { .mmi
+ stf8 [ptr2] = fvalue, 24
+ stf8 [ptr0] = fvalue, 24
+ ;; }
+ { .mmi
+ stf8 [ptr2] = fvalue, 8
+ stf8 [ptr0] = fvalue, 8
+ ;; }
+ { .mmi
+ stf8 [ptr2] = fvalue, 24
+ stf8 [ptr0] = fvalue, 24
+ ;; }
+ { .mmi
+ stf8 [ptr2] = fvalue, 8
+ stf8 [ptr0] = fvalue, 32
+ cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching?
+ ;; }
+{ .mmb
+ stf8 [ptr2] = fvalue, 24
+(p_scr) stf8 [ptr9] = fvalue, 128
+ br.cloop.dptk.few .l1ax
+;; }
+{ .mbb
+ cmp.le p_scr, p0 = 8, cnt // just a few bytes left ?
+(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2
+ br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3
+;; }
+
+ TEXT_ALIGN(32)
+.l1b: // ------------------------------------ // L1B: store ahead into cache lines; fill later
+{ .mmi
+ and tmp = -(LINE_SIZE), cnt // compute end of range
+ mov ptr9 = ptr1 // used for prefetching
+ and cnt = (LINE_SIZE-1), cnt // remainder
+} { .mmi
+ mov loopcnt = PREF_AHEAD-1 // default prefetch loop
+ cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
+;; }
+{ .mmi
+(p_scr) add loopcnt = -1, linecnt
+ add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores)
+ add ptr1 = tmp, ptr1 // first address beyond total range
+;; }
+{ .mmi
+ add tmp = -1, linecnt // next loop count
+ mov.i ar.lc = loopcnt
+;; }
+.pref_l1b:
+{ .mib
+ stf.spill [ptr9] = f0, 128 // Do stores one cache line apart
+ nop.i 0
+ br.cloop.dptk.few .pref_l1b
+;; }
+{ .mmi
+ add ptr0 = 16, ptr2 // Two stores in parallel
+ mov.i ar.lc = tmp
+;; }
+.l1bx:
+ { .mmi
+ stf.spill [ptr2] = f0, 32
+ stf.spill [ptr0] = f0, 32
+ ;; }
+ { .mmi
+ stf.spill [ptr2] = f0, 32
+ stf.spill [ptr0] = f0, 32
+ ;; }
+ { .mmi
+ stf.spill [ptr2] = f0, 32
+ stf.spill [ptr0] = f0, 64
+ cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching?
+ ;; }
+{ .mmb
+ stf.spill [ptr2] = f0, 32
+(p_scr) stf.spill [ptr9] = f0, 128
+ br.cloop.dptk.few .l1bx
+;; }
+{ .mib
+ cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ?
+(p_scr) br.cond.dpnt.many .move_bytes_from_alignment //
+;; }
+
+.fraction_of_line:
+{ .mib
+ add ptr2 = 16, ptr1
+ shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32
+;; }
+{ .mib
+ cmp.eq p_scr, p0 = loopcnt, r0
+ add loopcnt = -1, loopcnt
+(p_scr) br.cond.dpnt.many .store_words
+;; }
+{ .mib
+ and cnt = 0x1f, cnt // compute the remaining cnt
+ mov.i ar.lc = loopcnt
+;; }
+ TEXT_ALIGN(32)
+.l2: // ------------------------------------ // L2A: store 32B in 2 cycles
+{ .mmb
+ stf8 [ptr1] = fvalue, 8
+ stf8 [ptr2] = fvalue, 8
+;; } { .mmb
+ stf8 [ptr1] = fvalue, 24
+ stf8 [ptr2] = fvalue, 24
+ br.cloop.dptk.many .l2
+;; }
+.store_words:
+{ .mib
+ cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ?
+(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch
+;; }
+
+{ .mmi
+ stf8 [ptr1] = fvalue, 8 // store
+ cmp.le p_y, p_n = 16, cnt
+ add cnt = -8, cnt // subtract
+;; }
+{ .mmi
+(p_y) stf8 [ptr1] = fvalue, 8 // store
+(p_y) cmp.le.unc p_yy, p_nn = 16, cnt
+(p_y) add cnt = -8, cnt // subtract
+;; }
+{ .mmi // store
+(p_yy) stf8 [ptr1] = fvalue, 8
+(p_yy) add cnt = -8, cnt // subtract
+;; }
+
+.move_bytes_from_alignment:
+{ .mib
+ cmp.eq p_scr, p0 = cnt, r0
+ tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ?
+(p_scr) br.cond.dpnt.few .restore_and_exit
+;; }
+{ .mib
+(p_y) st4 [ptr1] = value,4
+ tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ?
+;; }
+{ .mib
+(p_yy) st2 [ptr1] = value,2
+ tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ?
+;; }
+
+{ .mib
+(p_y) st1 [ptr1] = value
+;; }
+.restore_and_exit:
+{ .mib
+ nop.m 0
+ mov.i ar.lc = save_lc
+ br.ret.sptk.many rp
+;; }
+
+.move_bytes_unaligned:
+{ .mmi
+ .pred.rel "mutex",p_y, p_n
+ .pred.rel "mutex",p_yy, p_nn
+(p_n) cmp.le p_yy, p_nn = 4, cnt
+(p_y) cmp.le p_yy, p_nn = 5, cnt
+(p_n) add ptr2 = 2, ptr1
+} { .mmi
+(p_y) add ptr2 = 3, ptr1
+(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte [15, 14 (or less) left]
+(p_y) add cnt = -1, cnt
+;; }
+{ .mmi
+(p_yy) cmp.le.unc p_y, p0 = 8, cnt
+ add ptr3 = ptr1, cnt // prepare last store
+ mov.i ar.lc = save_lc
+} { .mmi
+(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
+(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [11, 10 (o less) left]
+(p_yy) add cnt = -4, cnt
+;; }
+{ .mmi
+(p_y) cmp.le.unc p_yy, p0 = 8, cnt
+ add ptr3 = -1, ptr3 // last store
+ tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ?
+} { .mmi
+(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
+(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [7, 6 (or less) left]
+(p_y) add cnt = -4, cnt
+;; }
+{ .mmi
+(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
+(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [3, 2 (or less) left]
+ tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ?
+} { .mmi
+(p_yy) add cnt = -4, cnt
+;; }
+{ .mmb
+(p_scr) st2 [ptr1] = value // fill 2 (aligned) bytes
+(p_y) st1 [ptr3] = value // fill last byte (using ptr3)
+ br.ret.sptk.many rp
+}
+END(memset)
diff --git a/arch/ia64/lib/strlen.S b/arch/ia64/lib/strlen.S
new file mode 100644
index 00000000000..e0cdac0a85b
--- /dev/null
+++ b/arch/ia64/lib/strlen.S
@@ -0,0 +1,192 @@
+/*
+ *
+ * Optimized version of the standard strlen() function
+ *
+ *
+ * Inputs:
+ * in0 address of string
+ *
+ * Outputs:
+ * ret0 the number of characters in the string (0 if empty string)
+ * does not count the \0
+ *
+ * Copyright (C) 1999, 2001 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 09/24/99 S.Eranian add speculation recovery code
+ */
+
+#include <asm/asmmacro.h>
+
+//
+//
+// This is an enhanced version of the basic strlen. it includes a combination
+// of compute zero index (czx), parallel comparisons, speculative loads and
+// loop unroll using rotating registers.
+//
+// General Ideas about the algorithm:
+// The goal is to look at the string in chunks of 8 bytes.
+// so we need to do a few extra checks at the beginning because the
+// string may not be 8-byte aligned. In this case we load the 8byte
+// quantity which includes the start of the string and mask the unused
+// bytes with 0xff to avoid confusing czx.
+// We use speculative loads and software pipelining to hide memory
+// latency and do read ahead safely. This way we defer any exception.
+//
+// Because we don't want the kernel to be relying on particular
+// settings of the DCR register, we provide recovery code in case
+// speculation fails. The recovery code is going to "redo" the work using
+// only normal loads. If we still get a fault then we generate a
+// kernel panic. Otherwise we return the strlen as usual.
+//
+// The fact that speculation may fail can be caused, for instance, by
+// the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
+// a NaT bit will be set if the translation is not present. The normal
+// load, on the other hand, will cause the translation to be inserted
+// if the mapping exists.
+//
+// It should be noted that we execute recovery code only when we need
+// to use the data that has been speculatively loaded: we don't execute
+// recovery code on pure read ahead data.
+//
+// Remarks:
+// - the cmp r0,r0 is used as a fast way to initialize a predicate
+// register to 1. This is required to make sure that we get the parallel
+// compare correct.
+//
+// - we don't use the epilogue counter to exit the loop but we need to set
+// it to zero beforehand.
+//
+// - after the loop we must test for Nat values because neither the
+// czx nor cmp instruction raise a NaT consumption fault. We must be
+// careful not to look too far for a Nat for which we don't care.
+// For instance we don't need to look at a NaT in val2 if the zero byte
+// was in val1.
+//
+// - Clearly performance tuning is required.
+//
+//
+//
+#define saved_pfs r11
+#define tmp r10
+#define base r16
+#define orig r17
+#define saved_pr r18
+#define src r19
+#define mask r20
+#define val r21
+#define val1 r22
+#define val2 r23
+
+GLOBAL_ENTRY(strlen)
+ .prologue
+ .save ar.pfs, saved_pfs
+ alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8
+
+ .rotr v[2], w[2] // declares our 4 aliases
+
+ extr.u tmp=in0,0,3 // tmp=least significant 3 bits
+ mov orig=in0 // keep trackof initial byte address
+ dep src=0,in0,0,3 // src=8byte-aligned in0 address
+ .save pr, saved_pr
+ mov saved_pr=pr // preserve predicates (rotation)
+ ;;
+
+ .body
+
+ ld8 v[1]=[src],8 // must not speculate: can fail here
+ shl tmp=tmp,3 // multiply by 8bits/byte
+ mov mask=-1 // our mask
+ ;;
+ ld8.s w[1]=[src],8 // speculatively load next
+ cmp.eq p6,p0=r0,r0 // sets p6 to true for cmp.and
+ sub tmp=64,tmp // how many bits to shift our mask on the right
+ ;;
+ shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part
+ mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs)
+ ;;
+ add base=-16,src // keep track of aligned base
+ or v[1]=v[1],mask // now we have a safe initial byte pattern
+ ;;
+1:
+ ld8.s v[0]=[src],8 // speculatively load next
+ czx1.r val1=v[1] // search 0 byte from right
+ czx1.r val2=w[1] // search 0 byte from right following 8bytes
+ ;;
+ ld8.s w[0]=[src],8 // speculatively load next to next
+ cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8
+ cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8
+(p6) br.wtop.dptk 1b // loop until p6 == 0
+ ;;
+ //
+ // We must return try the recovery code iff
+ // val1_is_nat || (val1==8 && val2_is_nat)
+ //
+ // XXX Fixme
+ // - there must be a better way of doing the test
+ //
+ cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate)
+ tnat.nz p6,p7=val1 // test NaT on val1
+(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT
+ ;;
+ //
+ // if we come here p7 is true, i.e., initialized for // cmp
+ //
+ cmp.eq.and p7,p0=8,val1// val1==8?
+ tnat.nz.and p7,p0=val2 // test NaT if val2
+(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT
+ ;;
+(p8) mov val1=val2 // the other test got us out of the loop
+(p8) adds src=-16,src // correct position when 3 ahead
+(p9) adds src=-24,src // correct position when 4 ahead
+ ;;
+ sub ret0=src,orig // distance from base
+ sub tmp=8,val1 // which byte in word
+ mov pr=saved_pr,0xffffffffffff0000
+ ;;
+ sub ret0=ret0,tmp // adjust
+ mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
+ br.ret.sptk.many rp // end of normal execution
+
+ //
+ // Outlined recovery code when speculation failed
+ //
+ // This time we don't use speculation and rely on the normal exception
+ // mechanism. that's why the loop is not as good as the previous one
+ // because read ahead is not possible
+ //
+ // IMPORTANT:
+ // Please note that in the case of strlen() as opposed to strlen_user()
+ // we don't use the exception mechanism, as this function is not
+ // supposed to fail. If that happens it means we have a bug and the
+ // code will cause of kernel fault.
+ //
+ // XXX Fixme
+ // - today we restart from the beginning of the string instead
+ // of trying to continue where we left off.
+ //
+.recover:
+ ld8 val=[base],8 // will fail if unrecoverable fault
+ ;;
+ or val=val,mask // remask first bytes
+ cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop
+ ;;
+ //
+ // ar.ec is still zero here
+ //
+2:
+(p6) ld8 val=[base],8 // will fail if unrecoverable fault
+ ;;
+ czx1.r val1=val // search 0 byte from right
+ ;;
+ cmp.eq p6,p0=8,val1 // val1==8 ?
+(p6) br.wtop.dptk 2b // loop until p6 == 0
+ ;; // (avoid WAW on p63)
+ sub ret0=base,orig // distance from base
+ sub tmp=8,val1
+ mov pr=saved_pr,0xffffffffffff0000
+ ;;
+ sub ret0=ret0,tmp // length=now - back -1
+ mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
+ br.ret.sptk.many rp // end of successful recovery code
+END(strlen)
diff --git a/arch/ia64/lib/strlen_user.S b/arch/ia64/lib/strlen_user.S
new file mode 100644
index 00000000000..c71eded4285
--- /dev/null
+++ b/arch/ia64/lib/strlen_user.S
@@ -0,0 +1,198 @@
+/*
+ * Optimized version of the strlen_user() function
+ *
+ * Inputs:
+ * in0 address of buffer
+ *
+ * Outputs:
+ * ret0 0 in case of fault, strlen(buffer)+1 otherwise
+ *
+ * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 01/19/99 S.Eranian heavily enhanced version (see details below)
+ * 09/24/99 S.Eranian added speculation recovery code
+ */
+
+#include <asm/asmmacro.h>
+
+//
+// int strlen_user(char *)
+// ------------------------
+// Returns:
+// - length of string + 1
+// - 0 in case an exception is raised
+//
+// This is an enhanced version of the basic strlen_user. it includes a
+// combination of compute zero index (czx), parallel comparisons, speculative
+// loads and loop unroll using rotating registers.
+//
+// General Ideas about the algorithm:
+// The goal is to look at the string in chunks of 8 bytes.
+// so we need to do a few extra checks at the beginning because the
+// string may not be 8-byte aligned. In this case we load the 8byte
+// quantity which includes the start of the string and mask the unused
+// bytes with 0xff to avoid confusing czx.
+// We use speculative loads and software pipelining to hide memory
+// latency and do read ahead safely. This way we defer any exception.
+//
+// Because we don't want the kernel to be relying on particular
+// settings of the DCR register, we provide recovery code in case
+// speculation fails. The recovery code is going to "redo" the work using
+// only normal loads. If we still get a fault then we return an
+// error (ret0=0). Otherwise we return the strlen+1 as usual.
+// The fact that speculation may fail can be caused, for instance, by
+// the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
+// a NaT bit will be set if the translation is not present. The normal
+// load, on the other hand, will cause the translation to be inserted
+// if the mapping exists.
+//
+// It should be noted that we execute recovery code only when we need
+// to use the data that has been speculatively loaded: we don't execute
+// recovery code on pure read ahead data.
+//
+// Remarks:
+// - the cmp r0,r0 is used as a fast way to initialize a predicate
+// register to 1. This is required to make sure that we get the parallel
+// compare correct.
+//
+// - we don't use the epilogue counter to exit the loop but we need to set
+// it to zero beforehand.
+//
+// - after the loop we must test for Nat values because neither the
+// czx nor cmp instruction raise a NaT consumption fault. We must be
+// careful not to look too far for a Nat for which we don't care.
+// For instance we don't need to look at a NaT in val2 if the zero byte
+// was in val1.
+//
+// - Clearly performance tuning is required.
+//
+
+#define saved_pfs r11
+#define tmp r10
+#define base r16
+#define orig r17
+#define saved_pr r18
+#define src r19
+#define mask r20
+#define val r21
+#define val1 r22
+#define val2 r23
+
+GLOBAL_ENTRY(__strlen_user)
+ .prologue
+ .save ar.pfs, saved_pfs
+ alloc saved_pfs=ar.pfs,11,0,0,8
+
+ .rotr v[2], w[2] // declares our 4 aliases
+
+ extr.u tmp=in0,0,3 // tmp=least significant 3 bits
+ mov orig=in0 // keep trackof initial byte address
+ dep src=0,in0,0,3 // src=8byte-aligned in0 address
+ .save pr, saved_pr
+ mov saved_pr=pr // preserve predicates (rotation)
+ ;;
+
+ .body
+
+ ld8.s v[1]=[src],8 // load the initial 8bytes (must speculate)
+ shl tmp=tmp,3 // multiply by 8bits/byte
+ mov mask=-1 // our mask
+ ;;
+ ld8.s w[1]=[src],8 // load next 8 bytes in 2nd pipeline
+ cmp.eq p6,p0=r0,r0 // sets p6 (required because of // cmp.and)
+ sub tmp=64,tmp // how many bits to shift our mask on the right
+ ;;
+ shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part
+ mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs)
+ ;;
+ add base=-16,src // keep track of aligned base
+ chk.s v[1], .recover // if already NaT, then directly skip to recover
+ or v[1]=v[1],mask // now we have a safe initial byte pattern
+ ;;
+1:
+ ld8.s v[0]=[src],8 // speculatively load next
+ czx1.r val1=v[1] // search 0 byte from right
+ czx1.r val2=w[1] // search 0 byte from right following 8bytes
+ ;;
+ ld8.s w[0]=[src],8 // speculatively load next to next
+ cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8
+ cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8
+(p6) br.wtop.dptk.few 1b // loop until p6 == 0
+ ;;
+ //
+ // We must return try the recovery code iff
+ // val1_is_nat || (val1==8 && val2_is_nat)
+ //
+ // XXX Fixme
+ // - there must be a better way of doing the test
+ //
+ cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate)
+ tnat.nz p6,p7=val1 // test NaT on val1
+(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT
+ ;;
+ //
+ // if we come here p7 is true, i.e., initialized for // cmp
+ //
+ cmp.eq.and p7,p0=8,val1// val1==8?
+ tnat.nz.and p7,p0=val2 // test NaT if val2
+(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT
+ ;;
+(p8) mov val1=val2 // val2 contains the value
+(p8) adds src=-16,src // correct position when 3 ahead
+(p9) adds src=-24,src // correct position when 4 ahead
+ ;;
+ sub ret0=src,orig // distance from origin
+ sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1
+ mov pr=saved_pr,0xffffffffffff0000
+ ;;
+ sub ret0=ret0,tmp // length=now - back -1
+ mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
+ br.ret.sptk.many rp // end of normal execution
+
+ //
+ // Outlined recovery code when speculation failed
+ //
+ // This time we don't use speculation and rely on the normal exception
+ // mechanism. that's why the loop is not as good as the previous one
+ // because read ahead is not possible
+ //
+ // XXX Fixme
+ // - today we restart from the beginning of the string instead
+ // of trying to continue where we left off.
+ //
+.recover:
+ EX(.Lexit1, ld8 val=[base],8) // load the initial bytes
+ ;;
+ or val=val,mask // remask first bytes
+ cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop
+ ;;
+ //
+ // ar.ec is still zero here
+ //
+2:
+ EX(.Lexit1, (p6) ld8 val=[base],8)
+ ;;
+ czx1.r val1=val // search 0 byte from right
+ ;;
+ cmp.eq p6,p0=8,val1 // val1==8 ?
+(p6) br.wtop.dptk.few 2b // loop until p6 == 0
+ ;;
+ sub ret0=base,orig // distance from base
+ sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1
+ mov pr=saved_pr,0xffffffffffff0000
+ ;;
+ sub ret0=ret0,tmp // length=now - back -1
+ mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
+ br.ret.sptk.many rp // end of successful recovery code
+
+ //
+ // We failed even on the normal load (called from exception handler)
+ //
+.Lexit1:
+ mov ret0=0
+ mov pr=saved_pr,0xffffffffffff0000
+ mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
+ br.ret.sptk.many rp
+END(__strlen_user)
diff --git a/arch/ia64/lib/strncpy_from_user.S b/arch/ia64/lib/strncpy_from_user.S
new file mode 100644
index 00000000000..a504381f31e
--- /dev/null
+++ b/arch/ia64/lib/strncpy_from_user.S
@@ -0,0 +1,44 @@
+/*
+ * Just like strncpy() except that if a fault occurs during copying,
+ * -EFAULT is returned.
+ *
+ * Inputs:
+ * in0: address of destination buffer
+ * in1: address of string to be copied
+ * in2: length of buffer in bytes
+ * Outputs:
+ * r8: -EFAULT in case of fault or number of bytes copied if no fault
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 00/03/06 D. Mosberger Fixed to return proper return value (bug found by
+ * by Andreas Schwab <schwab@suse.de>).
+ */
+
+#include <asm/asmmacro.h>
+
+GLOBAL_ENTRY(__strncpy_from_user)
+ alloc r2=ar.pfs,3,0,0,0
+ mov r8=0
+ mov r9=in1
+ ;;
+ add r10=in1,in2
+ cmp.eq p6,p0=r0,in2
+(p6) br.ret.spnt.many rp
+
+ // XXX braindead copy loop---this needs to be optimized
+.Loop1:
+ EX(.Lexit, ld1 r8=[in1],1)
+ ;;
+ EX(.Lexit, st1 [in0]=r8,1)
+ cmp.ne p6,p7=r8,r0
+ ;;
+(p6) cmp.ne.unc p8,p0=in1,r10
+(p8) br.cond.dpnt.few .Loop1
+ ;;
+(p6) mov r8=in2 // buffer filled up---return buffer length
+(p7) sub r8=in1,r9,1 // return string length (excluding NUL character)
+[.Lexit:]
+ br.ret.sptk.many rp
+END(__strncpy_from_user)
diff --git a/arch/ia64/lib/strnlen_user.S b/arch/ia64/lib/strnlen_user.S
new file mode 100644
index 00000000000..d09066b1e49
--- /dev/null
+++ b/arch/ia64/lib/strnlen_user.S
@@ -0,0 +1,45 @@
+/*
+ * Returns 0 if exception before NUL or reaching the supplied limit (N),
+ * a value greater than N if the string is longer than the limit, else
+ * strlen.
+ *
+ * Inputs:
+ * in0: address of buffer
+ * in1: string length limit N
+ * Outputs:
+ * r8: 0 in case of fault, strlen(buffer)+1 otherwise
+ *
+ * Copyright (C) 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/asmmacro.h>
+
+GLOBAL_ENTRY(__strnlen_user)
+ .prologue
+ alloc r2=ar.pfs,2,0,0,0
+ .save ar.lc, r16
+ mov r16=ar.lc // preserve ar.lc
+
+ .body
+
+ add r3=-1,in1
+ ;;
+ mov ar.lc=r3
+ mov r9=0
+ ;;
+ // XXX braindead strlen loop---this needs to be optimized
+.Loop1:
+ EXCLR(.Lexit, ld1 r8=[in0],1)
+ add r9=1,r9
+ ;;
+ cmp.eq p6,p0=r8,r0
+(p6) br.cond.dpnt .Lexit
+ br.cloop.dptk.few .Loop1
+
+ add r9=1,in1 // NUL not found---return N+1
+ ;;
+.Lexit:
+ mov r8=r9
+ mov ar.lc=r16 // restore ar.lc
+ br.ret.sptk.many rp
+END(__strnlen_user)
diff --git a/arch/ia64/lib/swiotlb.c b/arch/ia64/lib/swiotlb.c
new file mode 100644
index 00000000000..ab7b3ad99a7
--- /dev/null
+++ b/arch/ia64/lib/swiotlb.c
@@ -0,0 +1,658 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * This implementation is for IA-64 platforms that do not support
+ * I/O TLBs (aka DMA address translation hardware).
+ * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
+ * Copyright (C) 2000, 2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API.
+ * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid
+ * unnecessary i-cache flushing.
+ * 04/07/.. ak Better overflow handling. Assorted fixes.
+ */
+
+#include <linux/cache.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+
+#include <asm/io.h>
+#include <asm/pci.h>
+#include <asm/dma.h>
+
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#define OFFSET(val,align) ((unsigned long) \
+ ( (val) & ( (align) - 1)))
+
+#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
+#define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))
+
+/*
+ * Maximum allowable number of contiguous slabs to map,
+ * must be a power of 2. What is the appropriate value ?
+ * The complexity of {map,unmap}_single is linearly dependent on this value.
+ */
+#define IO_TLB_SEGSIZE 128
+
+/*
+ * log of the size of each IO TLB slab. The number of slabs is command line
+ * controllable.
+ */
+#define IO_TLB_SHIFT 11
+
+int swiotlb_force;
+
+/*
+ * Used to do a quick range check in swiotlb_unmap_single and
+ * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
+ * API.
+ */
+static char *io_tlb_start, *io_tlb_end;
+
+/*
+ * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
+ * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
+ */
+static unsigned long io_tlb_nslabs;
+
+/*
+ * When the IOMMU overflows we return a fallback buffer. This sets the size.
+ */
+static unsigned long io_tlb_overflow = 32*1024;
+
+void *io_tlb_overflow_buffer;
+
+/*
+ * This is a free list describing the number of free entries available from
+ * each index
+ */
+static unsigned int *io_tlb_list;
+static unsigned int io_tlb_index;
+
+/*
+ * We need to save away the original address corresponding to a mapped entry
+ * for the sync operations.
+ */
+static unsigned char **io_tlb_orig_addr;
+
+/*
+ * Protect the above data structures in the map and unmap calls
+ */
+static DEFINE_SPINLOCK(io_tlb_lock);
+
+static int __init
+setup_io_tlb_npages(char *str)
+{
+ if (isdigit(*str)) {
+ io_tlb_nslabs = simple_strtoul(str, &str, 0) <<
+ (PAGE_SHIFT - IO_TLB_SHIFT);
+ /* avoid tail segment of size < IO_TLB_SEGSIZE */
+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ }
+ if (*str == ',')
+ ++str;
+ if (!strcmp(str, "force"))
+ swiotlb_force = 1;
+ return 1;
+}
+__setup("swiotlb=", setup_io_tlb_npages);
+/* make io_tlb_overflow tunable too? */
+
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the PCI DMA API.
+ */
+void
+swiotlb_init_with_default_size (size_t default_size)
+{
+ unsigned long i;
+
+ if (!io_tlb_nslabs) {
+ io_tlb_nslabs = (default_size >> PAGE_SHIFT);
+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ }
+
+ /*
+ * Get IO TLB memory from the low pages
+ */
+ io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs *
+ (1 << IO_TLB_SHIFT));
+ if (!io_tlb_start)
+ panic("Cannot allocate SWIOTLB buffer");
+ io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
+
+ /*
+ * Allocate and initialize the free list array. This array is used
+ * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
+ * between io_tlb_start and io_tlb_end.
+ */
+ io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
+ for (i = 0; i < io_tlb_nslabs; i++)
+ io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+ io_tlb_index = 0;
+ io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
+
+ /*
+ * Get the overflow emergency buffer
+ */
+ io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
+ printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
+ virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
+}
+
+void
+swiotlb_init (void)
+{
+ swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */
+}
+
+static inline int
+address_needs_mapping(struct device *hwdev, dma_addr_t addr)
+{
+ dma_addr_t mask = 0xffffffff;
+ /* If the device has a mask, use it, otherwise default to 32 bits */
+ if (hwdev && hwdev->dma_mask)
+ mask = *hwdev->dma_mask;
+ return (addr & ~mask) != 0;
+}
+
+/*
+ * Allocates bounce buffer and returns its kernel virtual address.
+ */
+static void *
+map_single(struct device *hwdev, char *buffer, size_t size, int dir)
+{
+ unsigned long flags;
+ char *dma_addr;
+ unsigned int nslots, stride, index, wrap;
+ int i;
+
+ /*
+ * For mappings greater than a page, we limit the stride (and
+ * hence alignment) to a page size.
+ */
+ nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ if (size > PAGE_SIZE)
+ stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+ else
+ stride = 1;
+
+ if (!nslots)
+ BUG();
+
+ /*
+ * Find suitable number of IO TLB entries size that will fit this
+ * request and allocate a buffer from that IO TLB pool.
+ */
+ spin_lock_irqsave(&io_tlb_lock, flags);
+ {
+ wrap = index = ALIGN(io_tlb_index, stride);
+
+ if (index >= io_tlb_nslabs)
+ wrap = index = 0;
+
+ do {
+ /*
+ * If we find a slot that indicates we have 'nslots'
+ * number of contiguous buffers, we allocate the
+ * buffers from that slot and mark the entries as '0'
+ * indicating unavailable.
+ */
+ if (io_tlb_list[index] >= nslots) {
+ int count = 0;
+
+ for (i = index; i < (int) (index + nslots); i++)
+ io_tlb_list[i] = 0;
+ for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
+ io_tlb_list[i] = ++count;
+ dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+
+ /*
+ * Update the indices to avoid searching in
+ * the next round.
+ */
+ io_tlb_index = ((index + nslots) < io_tlb_nslabs
+ ? (index + nslots) : 0);
+
+ goto found;
+ }
+ index += stride;
+ if (index >= io_tlb_nslabs)
+ index = 0;
+ } while (index != wrap);
+
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+ return NULL;
+ }
+ found:
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+
+ /*
+ * Save away the mapping from the original address to the DMA address.
+ * This is needed when we sync the memory. Then we sync the buffer if
+ * needed.
+ */
+ io_tlb_orig_addr[index] = buffer;
+ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
+ memcpy(dma_addr, buffer, size);
+
+ return dma_addr;
+}
+
+/*
+ * dma_addr is the kernel virtual address of the bounce buffer to unmap.
+ */
+static void
+unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+{
+ unsigned long flags;
+ int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ char *buffer = io_tlb_orig_addr[index];
+
+ /*
+ * First, sync the memory before unmapping the entry
+ */
+ if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+ /*
+ * bounce... copy the data back into the original buffer * and
+ * delete the bounce buffer.
+ */
+ memcpy(buffer, dma_addr, size);
+
+ /*
+ * Return the buffer to the free list by setting the corresponding
+ * entries to indicate the number of contigous entries available.
+ * While returning the entries to the free list, we merge the entries
+ * with slots below and above the pool being returned.
+ */
+ spin_lock_irqsave(&io_tlb_lock, flags);
+ {
+ count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
+ io_tlb_list[index + nslots] : 0);
+ /*
+ * Step 1: return the slots to the free list, merging the
+ * slots with superceeding slots
+ */
+ for (i = index + nslots - 1; i >= index; i--)
+ io_tlb_list[i] = ++count;
+ /*
+ * Step 2: merge the returned slots with the preceding slots,
+ * if available (non zero)
+ */
+ for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
+ io_tlb_list[i] = ++count;
+ }
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+}
+
+static void
+sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+{
+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ char *buffer = io_tlb_orig_addr[index];
+
+ /*
+ * bounce... copy the data back into/from the original buffer
+ * XXX How do you handle DMA_BIDIRECTIONAL here ?
+ */
+ if (dir == DMA_FROM_DEVICE)
+ memcpy(buffer, dma_addr, size);
+ else if (dir == DMA_TO_DEVICE)
+ memcpy(dma_addr, buffer, size);
+ else
+ BUG();
+}
+
+void *
+swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, int flags)
+{
+ unsigned long dev_addr;
+ void *ret;
+ int order = get_order(size);
+
+ /*
+ * XXX fix me: the DMA API should pass us an explicit DMA mask
+ * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32
+ * bit range instead of a 16MB one).
+ */
+ flags |= GFP_DMA;
+
+ ret = (void *)__get_free_pages(flags, order);
+ if (ret && address_needs_mapping(hwdev, virt_to_phys(ret))) {
+ /*
+ * The allocated memory isn't reachable by the device.
+ * Fall back on swiotlb_map_single().
+ */
+ free_pages((unsigned long) ret, order);
+ ret = NULL;
+ }
+ if (!ret) {
+ /*
+ * We are either out of memory or the device can't DMA
+ * to GFP_DMA memory; fall back on
+ * swiotlb_map_single(), which will grab memory from
+ * the lowest available address range.
+ */
+ dma_addr_t handle;
+ handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
+ if (dma_mapping_error(handle))
+ return NULL;
+
+ ret = phys_to_virt(handle);
+ }
+
+ memset(ret, 0, size);
+ dev_addr = virt_to_phys(ret);
+
+ /* Confirm address can be DMA'd by device */
+ if (address_needs_mapping(hwdev, dev_addr)) {
+ printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016lx\n",
+ (unsigned long long)*hwdev->dma_mask, dev_addr);
+ panic("swiotlb_alloc_coherent: allocated memory is out of "
+ "range for device");
+ }
+ *dma_handle = dev_addr;
+ return ret;
+}
+
+void
+swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
+ dma_addr_t dma_handle)
+{
+ if (!(vaddr >= (void *)io_tlb_start
+ && vaddr < (void *)io_tlb_end))
+ free_pages((unsigned long) vaddr, get_order(size));
+ else
+ /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
+ swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE);
+}
+
+static void
+swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
+{
+ /*
+ * Ran out of IOMMU space for this operation. This is very bad.
+ * Unfortunately the drivers cannot handle this operation properly.
+ * unless they check for pci_dma_mapping_error (most don't)
+ * When the mapping is small enough return a static buffer to limit
+ * the damage, or panic when the transfer is too big.
+ */
+ printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at "
+ "device %s\n", size, dev ? dev->bus_id : "?");
+
+ if (size > io_tlb_overflow && do_panic) {
+ if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+ panic("PCI-DMA: Memory would be corrupted\n");
+ if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+ panic("PCI-DMA: Random memory would be DMAed\n");
+ }
+}
+
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode. The
+ * PCI address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory until
+ * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+ */
+dma_addr_t
+swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
+{
+ unsigned long dev_addr = virt_to_phys(ptr);
+ void *map;
+
+ if (dir == DMA_NONE)
+ BUG();
+ /*
+ * If the pointer passed in happens to be in the device's DMA window,
+ * we can safely return the device addr and not worry about bounce
+ * buffering it.
+ */
+ if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force)
+ return dev_addr;
+
+ /*
+ * Oh well, have to allocate and map a bounce buffer.
+ */
+ map = map_single(hwdev, ptr, size, dir);
+ if (!map) {
+ swiotlb_full(hwdev, size, dir, 1);
+ map = io_tlb_overflow_buffer;
+ }
+
+ dev_addr = virt_to_phys(map);
+
+ /*
+ * Ensure that the address returned is DMA'ble
+ */
+ if (address_needs_mapping(hwdev, dev_addr))
+ panic("map_single: bounce buffer is not DMA'ble");
+
+ return dev_addr;
+}
+
+/*
+ * Since DMA is i-cache coherent, any (complete) pages that were written via
+ * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
+ * flush them when they get mapped into an executable vm-area.
+ */
+static void
+mark_clean(void *addr, size_t size)
+{
+ unsigned long pg_addr, end;
+
+ pg_addr = PAGE_ALIGN((unsigned long) addr);
+ end = (unsigned long) addr + size;
+ while (pg_addr + PAGE_SIZE <= end) {
+ struct page *page = virt_to_page(pg_addr);
+ set_bit(PG_arch_1, &page->flags);
+ pg_addr += PAGE_SIZE;
+ }
+}
+
+/*
+ * Unmap a single streaming mode DMA translation. The dma_addr and size must
+ * match what was provided for in a previous swiotlb_map_single call. All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ */
+void
+swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
+ int dir)
+{
+ char *dma_addr = phys_to_virt(dev_addr);
+
+ if (dir == DMA_NONE)
+ BUG();
+ if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+ unmap_single(hwdev, dma_addr, size, dir);
+ else if (dir == DMA_FROM_DEVICE)
+ mark_clean(dma_addr, size);
+}
+
+/*
+ * Make physical memory consistent for a single streaming mode DMA translation
+ * after a transfer.
+ *
+ * If you perform a swiotlb_map_single() but wish to interrogate the buffer
+ * using the cpu, yet do not wish to teardown the PCI dma mapping, you must
+ * call this function before doing so. At the next point you give the PCI dma
+ * address back to the card, you must first perform a
+ * swiotlb_dma_sync_for_device, and then the device again owns the buffer
+ */
+void
+swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, int dir)
+{
+ char *dma_addr = phys_to_virt(dev_addr);
+
+ if (dir == DMA_NONE)
+ BUG();
+ if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+ sync_single(hwdev, dma_addr, size, dir);
+ else if (dir == DMA_FROM_DEVICE)
+ mark_clean(dma_addr, size);
+}
+
+void
+swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, int dir)
+{
+ char *dma_addr = phys_to_virt(dev_addr);
+
+ if (dir == DMA_NONE)
+ BUG();
+ if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+ sync_single(hwdev, dma_addr, size, dir);
+ else if (dir == DMA_FROM_DEVICE)
+ mark_clean(dma_addr, size);
+}
+
+/*
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * This is the scatter-gather version of the above swiotlb_map_single
+ * interface. Here the scatter gather list elements are each tagged with the
+ * appropriate dma address and length. They are obtained via
+ * sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ * DMA address/length pairs than there are SG table elements.
+ * (for example via virtual mapping capabilities)
+ * The routine returns the number of addr/length pairs actually
+ * used, at most nents.
+ *
+ * Device ownership issues as mentioned above for swiotlb_map_single are the
+ * same here.
+ */
+int
+swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
+ int dir)
+{
+ void *addr;
+ unsigned long dev_addr;
+ int i;
+
+ if (dir == DMA_NONE)
+ BUG();
+
+ for (i = 0; i < nelems; i++, sg++) {
+ addr = SG_ENT_VIRT_ADDRESS(sg);
+ dev_addr = virt_to_phys(addr);
+ if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) {
+ sg->dma_address = (dma_addr_t) virt_to_phys(map_single(hwdev, addr, sg->length, dir));
+ if (!sg->dma_address) {
+ /* Don't panic here, we expect map_sg users
+ to do proper error handling. */
+ swiotlb_full(hwdev, sg->length, dir, 0);
+ swiotlb_unmap_sg(hwdev, sg - i, i, dir);
+ sg[0].dma_length = 0;
+ return 0;
+ }
+ } else
+ sg->dma_address = dev_addr;
+ sg->dma_length = sg->length;
+ }
+ return nelems;
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations. Again, cpu read rules
+ * concerning calls here are the same as for swiotlb_unmap_single() above.
+ */
+void
+swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
+ int dir)
+{
+ int i;
+
+ if (dir == DMA_NONE)
+ BUG();
+
+ for (i = 0; i < nelems; i++, sg++)
+ if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ unmap_single(hwdev, (void *) phys_to_virt(sg->dma_address), sg->dma_length, dir);
+ else if (dir == DMA_FROM_DEVICE)
+ mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
+}
+
+/*
+ * Make physical memory consistent for a set of streaming mode DMA translations
+ * after a transfer.
+ *
+ * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
+ * and usage.
+ */
+void
+swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+ int nelems, int dir)
+{
+ int i;
+
+ if (dir == DMA_NONE)
+ BUG();
+
+ for (i = 0; i < nelems; i++, sg++)
+ if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ sync_single(hwdev, (void *) sg->dma_address,
+ sg->dma_length, dir);
+}
+
+void
+swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+ int nelems, int dir)
+{
+ int i;
+
+ if (dir == DMA_NONE)
+ BUG();
+
+ for (i = 0; i < nelems; i++, sg++)
+ if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+ sync_single(hwdev, (void *) sg->dma_address,
+ sg->dma_length, dir);
+}
+
+int
+swiotlb_dma_mapping_error(dma_addr_t dma_addr)
+{
+ return (dma_addr == virt_to_phys(io_tlb_overflow_buffer));
+}
+
+/*
+ * Return whether the given PCI device DMA address mask can be supported
+ * properly. For example, if your device can only drive the low 24-bits
+ * during PCI bus mastering, then you would pass 0x00ffffff as the mask to
+ * this function.
+ */
+int
+swiotlb_dma_supported (struct device *hwdev, u64 mask)
+{
+ return (virt_to_phys (io_tlb_end) - 1) <= mask;
+}
+
+EXPORT_SYMBOL(swiotlb_init);
+EXPORT_SYMBOL(swiotlb_map_single);
+EXPORT_SYMBOL(swiotlb_unmap_single);
+EXPORT_SYMBOL(swiotlb_map_sg);
+EXPORT_SYMBOL(swiotlb_unmap_sg);
+EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
+EXPORT_SYMBOL(swiotlb_sync_single_for_device);
+EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
+EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
+EXPORT_SYMBOL(swiotlb_dma_mapping_error);
+EXPORT_SYMBOL(swiotlb_alloc_coherent);
+EXPORT_SYMBOL(swiotlb_free_coherent);
+EXPORT_SYMBOL(swiotlb_dma_supported);
diff --git a/arch/ia64/lib/xor.S b/arch/ia64/lib/xor.S
new file mode 100644
index 00000000000..54e3f7eab8e
--- /dev/null
+++ b/arch/ia64/lib/xor.S
@@ -0,0 +1,184 @@
+/*
+ * arch/ia64/lib/xor.S
+ *
+ * Optimized RAID-5 checksumming functions for IA-64.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/asmmacro.h>
+
+GLOBAL_ENTRY(xor_ia64_2)
+ .prologue
+ .fframe 0
+ .save ar.pfs, r31
+ alloc r31 = ar.pfs, 3, 0, 13, 16
+ .save ar.lc, r30
+ mov r30 = ar.lc
+ .save pr, r29
+ mov r29 = pr
+ ;;
+ .body
+ mov r8 = in1
+ mov ar.ec = 6 + 2
+ shr in0 = in0, 3
+ ;;
+ adds in0 = -1, in0
+ mov r16 = in1
+ mov r17 = in2
+ ;;
+ mov ar.lc = in0
+ mov pr.rot = 1 << 16
+ ;;
+ .rotr s1[6+1], s2[6+1], d[2]
+ .rotp p[6+2]
+0:
+(p[0]) ld8.nta s1[0] = [r16], 8
+(p[0]) ld8.nta s2[0] = [r17], 8
+(p[6]) xor d[0] = s1[6], s2[6]
+(p[6+1])st8.nta [r8] = d[1], 8
+ nop.f 0
+ br.ctop.dptk.few 0b
+ ;;
+ mov ar.lc = r30
+ mov pr = r29, -1
+ br.ret.sptk.few rp
+END(xor_ia64_2)
+
+GLOBAL_ENTRY(xor_ia64_3)
+ .prologue
+ .fframe 0
+ .save ar.pfs, r31
+ alloc r31 = ar.pfs, 4, 0, 20, 24
+ .save ar.lc, r30
+ mov r30 = ar.lc
+ .save pr, r29
+ mov r29 = pr
+ ;;
+ .body
+ mov r8 = in1
+ mov ar.ec = 6 + 2
+ shr in0 = in0, 3
+ ;;
+ adds in0 = -1, in0
+ mov r16 = in1
+ mov r17 = in2
+ ;;
+ mov r18 = in3
+ mov ar.lc = in0
+ mov pr.rot = 1 << 16
+ ;;
+ .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
+ .rotp p[6+2]
+0:
+(p[0]) ld8.nta s1[0] = [r16], 8
+(p[0]) ld8.nta s2[0] = [r17], 8
+(p[6]) xor d[0] = s1[6], s2[6]
+ ;;
+(p[0]) ld8.nta s3[0] = [r18], 8
+(p[6+1])st8.nta [r8] = d[1], 8
+(p[6]) xor d[0] = d[0], s3[6]
+ br.ctop.dptk.few 0b
+ ;;
+ mov ar.lc = r30
+ mov pr = r29, -1
+ br.ret.sptk.few rp
+END(xor_ia64_3)
+
+GLOBAL_ENTRY(xor_ia64_4)
+ .prologue
+ .fframe 0
+ .save ar.pfs, r31
+ alloc r31 = ar.pfs, 5, 0, 27, 32
+ .save ar.lc, r30
+ mov r30 = ar.lc
+ .save pr, r29
+ mov r29 = pr
+ ;;
+ .body
+ mov r8 = in1
+ mov ar.ec = 6 + 2
+ shr in0 = in0, 3
+ ;;
+ adds in0 = -1, in0
+ mov r16 = in1
+ mov r17 = in2
+ ;;
+ mov r18 = in3
+ mov ar.lc = in0
+ mov pr.rot = 1 << 16
+ mov r19 = in4
+ ;;
+ .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
+ .rotp p[6+2]
+0:
+(p[0]) ld8.nta s1[0] = [r16], 8
+(p[0]) ld8.nta s2[0] = [r17], 8
+(p[6]) xor d[0] = s1[6], s2[6]
+(p[0]) ld8.nta s3[0] = [r18], 8
+(p[0]) ld8.nta s4[0] = [r19], 8
+(p[6]) xor r20 = s3[6], s4[6]
+ ;;
+(p[6+1])st8.nta [r8] = d[1], 8
+(p[6]) xor d[0] = d[0], r20
+ br.ctop.dptk.few 0b
+ ;;
+ mov ar.lc = r30
+ mov pr = r29, -1
+ br.ret.sptk.few rp
+END(xor_ia64_4)
+
+GLOBAL_ENTRY(xor_ia64_5)
+ .prologue
+ .fframe 0
+ .save ar.pfs, r31
+ alloc r31 = ar.pfs, 6, 0, 34, 40
+ .save ar.lc, r30
+ mov r30 = ar.lc
+ .save pr, r29
+ mov r29 = pr
+ ;;
+ .body
+ mov r8 = in1
+ mov ar.ec = 6 + 2
+ shr in0 = in0, 3
+ ;;
+ adds in0 = -1, in0
+ mov r16 = in1
+ mov r17 = in2
+ ;;
+ mov r18 = in3
+ mov ar.lc = in0
+ mov pr.rot = 1 << 16
+ mov r19 = in4
+ mov r20 = in5
+ ;;
+ .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
+ .rotp p[6+2]
+0:
+(p[0]) ld8.nta s1[0] = [r16], 8
+(p[0]) ld8.nta s2[0] = [r17], 8
+(p[6]) xor d[0] = s1[6], s2[6]
+(p[0]) ld8.nta s3[0] = [r18], 8
+(p[0]) ld8.nta s4[0] = [r19], 8
+(p[6]) xor r21 = s3[6], s4[6]
+ ;;
+(p[0]) ld8.nta s5[0] = [r20], 8
+(p[6+1])st8.nta [r8] = d[1], 8
+(p[6]) xor d[0] = d[0], r21
+ ;;
+(p[6]) xor d[0] = d[0], s5[6]
+ nop.f 0
+ br.ctop.dptk.few 0b
+ ;;
+ mov ar.lc = r30
+ mov pr = r29, -1
+ br.ret.sptk.few rp
+END(xor_ia64_5)
diff --git a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile
new file mode 100644
index 00000000000..7078f67887e
--- /dev/null
+++ b/arch/ia64/mm/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the ia64-specific parts of the memory manager.
+#
+
+obj-y := init.o fault.o tlb.o extable.o
+
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_NUMA) += numa.o
+obj-$(CONFIG_DISCONTIGMEM) += discontig.o
+ifndef CONFIG_DISCONTIGMEM
+obj-y += contig.o
+endif
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
new file mode 100644
index 00000000000..6daf15ac894
--- /dev/null
+++ b/arch/ia64/mm/contig.c
@@ -0,0 +1,299 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved.
+ *
+ * Routines used by ia64 machines with contiguous (or virtually contiguous)
+ * memory.
+ */
+#include <linux/config.h>
+#include <linux/bootmem.h>
+#include <linux/efi.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+
+#include <asm/meminit.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+#include <asm/mca.h>
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+static unsigned long num_dma_physpages;
+#endif
+
+/**
+ * show_mem - display a memory statistics summary
+ *
+ * Just walks the pages in the system and describes where they're allocated.
+ */
+void
+show_mem (void)
+{
+ int i, total = 0, reserved = 0;
+ int shared = 0, cached = 0;
+
+ printk("Mem-info:\n");
+ show_free_areas();
+
+ printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+ i = max_mapnr;
+ while (i-- > 0) {
+ if (!pfn_valid(i))
+ continue;
+ total++;
+ if (PageReserved(mem_map+i))
+ reserved++;
+ else if (PageSwapCache(mem_map+i))
+ cached++;
+ else if (page_count(mem_map + i))
+ shared += page_count(mem_map + i) - 1;
+ }
+ printk("%d pages of RAM\n", total);
+ printk("%d reserved pages\n", reserved);
+ printk("%d pages shared\n", shared);
+ printk("%d pages swap cached\n", cached);
+ printk("%ld pages in page table cache\n", pgtable_cache_size);
+}
+
+/* physical address where the bootmem map is located */
+unsigned long bootmap_start;
+
+/**
+ * find_max_pfn - adjust the maximum page number callback
+ * @start: start of range
+ * @end: end of range
+ * @arg: address of pointer to global max_pfn variable
+ *
+ * Passed as a callback function to efi_memmap_walk() to determine the highest
+ * available page frame number in the system.
+ */
+int
+find_max_pfn (unsigned long start, unsigned long end, void *arg)
+{
+ unsigned long *max_pfnp = arg, pfn;
+
+ pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
+ if (pfn > *max_pfnp)
+ *max_pfnp = pfn;
+ return 0;
+}
+
+/**
+ * find_bootmap_location - callback to find a memory area for the bootmap
+ * @start: start of region
+ * @end: end of region
+ * @arg: unused callback data
+ *
+ * Find a place to put the bootmap and return its starting address in
+ * bootmap_start. This address must be page-aligned.
+ */
+int
+find_bootmap_location (unsigned long start, unsigned long end, void *arg)
+{
+ unsigned long needed = *(unsigned long *)arg;
+ unsigned long range_start, range_end, free_start;
+ int i;
+
+#if IGNORE_PFN0
+ if (start == PAGE_OFFSET) {
+ start += PAGE_SIZE;
+ if (start >= end)
+ return 0;
+ }
+#endif
+
+ free_start = PAGE_OFFSET;
+
+ for (i = 0; i < num_rsvd_regions; i++) {
+ range_start = max(start, free_start);
+ range_end = min(end, rsvd_region[i].start & PAGE_MASK);
+
+ free_start = PAGE_ALIGN(rsvd_region[i].end);
+
+ if (range_end <= range_start)
+ continue; /* skip over empty range */
+
+ if (range_end - range_start >= needed) {
+ bootmap_start = __pa(range_start);
+ return -1; /* done */
+ }
+
+ /* nothing more available in this segment */
+ if (range_end == end)
+ return 0;
+ }
+ return 0;
+}
+
+/**
+ * find_memory - setup memory map
+ *
+ * Walk the EFI memory map and find usable memory for the system, taking
+ * into account reserved areas.
+ */
+void
+find_memory (void)
+{
+ unsigned long bootmap_size;
+
+ reserve_memory();
+
+ /* first find highest page frame number */
+ max_pfn = 0;
+ efi_memmap_walk(find_max_pfn, &max_pfn);
+
+ /* how many bytes to cover all the pages */
+ bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
+
+ /* look for a location to hold the bootmap */
+ bootmap_start = ~0UL;
+ efi_memmap_walk(find_bootmap_location, &bootmap_size);
+ if (bootmap_start == ~0UL)
+ panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
+
+ bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
+
+ /* Free all available memory, then mark bootmem-map as being in use. */
+ efi_memmap_walk(filter_rsvd_memory, free_bootmem);
+ reserve_bootmem(bootmap_start, bootmap_size);
+
+ find_initrd();
+}
+
+#ifdef CONFIG_SMP
+/**
+ * per_cpu_init - setup per-cpu variables
+ *
+ * Allocate and setup per-cpu data areas.
+ */
+void *
+per_cpu_init (void)
+{
+ void *cpu_data;
+ int cpu;
+
+ /*
+ * get_free_pages() cannot be used before cpu_init() done. BSP
+ * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
+ * get_zeroed_page().
+ */
+ if (smp_processor_id() == 0) {
+ cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
+ PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
+ __per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
+ cpu_data += PERCPU_PAGE_SIZE;
+ per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+ }
+ }
+ return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
+}
+#endif /* CONFIG_SMP */
+
+static int
+count_pages (u64 start, u64 end, void *arg)
+{
+ unsigned long *count = arg;
+
+ *count += (end - start) >> PAGE_SHIFT;
+ return 0;
+}
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+static int
+count_dma_pages (u64 start, u64 end, void *arg)
+{
+ unsigned long *count = arg;
+
+ if (start < MAX_DMA_ADDRESS)
+ *count += (min(end, MAX_DMA_ADDRESS) - start) >> PAGE_SHIFT;
+ return 0;
+}
+#endif
+
+/*
+ * Set up the page tables.
+ */
+
+void
+paging_init (void)
+{
+ unsigned long max_dma;
+ unsigned long zones_size[MAX_NR_ZONES];
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+ unsigned long zholes_size[MAX_NR_ZONES];
+ unsigned long max_gap;
+#endif
+
+ /* initialize mem_map[] */
+
+ memset(zones_size, 0, sizeof(zones_size));
+
+ num_physpages = 0;
+ efi_memmap_walk(count_pages, &num_physpages);
+
+ max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+ memset(zholes_size, 0, sizeof(zholes_size));
+
+ num_dma_physpages = 0;
+ efi_memmap_walk(count_dma_pages, &num_dma_physpages);
+
+ if (max_low_pfn < max_dma) {
+ zones_size[ZONE_DMA] = max_low_pfn;
+ zholes_size[ZONE_DMA] = max_low_pfn - num_dma_physpages;
+ } else {
+ zones_size[ZONE_DMA] = max_dma;
+ zholes_size[ZONE_DMA] = max_dma - num_dma_physpages;
+ if (num_physpages > num_dma_physpages) {
+ zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
+ zholes_size[ZONE_NORMAL] =
+ ((max_low_pfn - max_dma) -
+ (num_physpages - num_dma_physpages));
+ }
+ }
+
+ max_gap = 0;
+ efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
+ if (max_gap < LARGE_GAP) {
+ vmem_map = (struct page *) 0;
+ free_area_init_node(0, &contig_page_data, zones_size, 0,
+ zholes_size);
+ } else {
+ unsigned long map_size;
+
+ /* allocate virtual_mem_map */
+
+ map_size = PAGE_ALIGN(max_low_pfn * sizeof(struct page));
+ vmalloc_end -= map_size;
+ vmem_map = (struct page *) vmalloc_end;
+ efi_memmap_walk(create_mem_map_page_table, NULL);
+
+ NODE_DATA(0)->node_mem_map = vmem_map;
+ free_area_init_node(0, &contig_page_data, zones_size,
+ 0, zholes_size);
+
+ printk("Virtual mem_map starts at 0x%p\n", mem_map);
+ }
+#else /* !CONFIG_VIRTUAL_MEM_MAP */
+ if (max_low_pfn < max_dma)
+ zones_size[ZONE_DMA] = max_low_pfn;
+ else {
+ zones_size[ZONE_DMA] = max_dma;
+ zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
+ }
+ free_area_init(zones_size);
+#endif /* !CONFIG_VIRTUAL_MEM_MAP */
+ zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
+}
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
new file mode 100644
index 00000000000..3456a9b6971
--- /dev/null
+++ b/arch/ia64/mm/discontig.c
@@ -0,0 +1,737 @@
+/*
+ * Copyright (c) 2000, 2003 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) 2001 Intel Corp.
+ * Copyright (c) 2001 Tony Luck <tony.luck@intel.com>
+ * Copyright (c) 2002 NEC Corp.
+ * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
+ * Copyright (c) 2004 Silicon Graphics, Inc
+ * Russ Anderson <rja@sgi.com>
+ * Jesse Barnes <jbarnes@sgi.com>
+ * Jack Steiner <steiner@sgi.com>
+ */
+
+/*
+ * Platform initialization for Discontig Memory
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/bootmem.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/nodemask.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/meminit.h>
+#include <asm/numa.h>
+#include <asm/sections.h>
+
+/*
+ * Track per-node information needed to setup the boot memory allocator, the
+ * per-node areas, and the real VM.
+ */
+struct early_node_data {
+ struct ia64_node_data *node_data;
+ pg_data_t *pgdat;
+ unsigned long pernode_addr;
+ unsigned long pernode_size;
+ struct bootmem_data bootmem_data;
+ unsigned long num_physpages;
+ unsigned long num_dma_physpages;
+ unsigned long min_pfn;
+ unsigned long max_pfn;
+};
+
+static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
+
+/**
+ * reassign_cpu_only_nodes - called from find_memory to move CPU-only nodes to a memory node
+ *
+ * This function will move nodes with only CPUs (no memory)
+ * to a node with memory which is at the minimum numa_slit distance.
+ * Any reassigments will result in the compression of the nodes
+ * and renumbering the nid values where appropriate.
+ * The static declarations below are to avoid large stack size which
+ * makes the code not re-entrant.
+ */
+static void __init reassign_cpu_only_nodes(void)
+{
+ struct node_memblk_s *p;
+ int i, j, k, nnode, nid, cpu, cpunid, pxm;
+ u8 cslit, slit;
+ static DECLARE_BITMAP(nodes_with_mem, MAX_NUMNODES) __initdata;
+ static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata;
+ static int node_flip[MAX_NUMNODES] __initdata;
+ static int old_nid_map[NR_CPUS] __initdata;
+
+ for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
+ if (!test_bit(p->nid, (void *) nodes_with_mem)) {
+ set_bit(p->nid, (void *) nodes_with_mem);
+ nnode++;
+ }
+
+ /*
+ * All nids with memory.
+ */
+ if (nnode == num_online_nodes())
+ return;
+
+ /*
+ * Change nids and attempt to migrate CPU-only nodes
+ * to the best numa_slit (closest neighbor) possible.
+ * For reassigned CPU nodes a nid can't be arrived at
+ * until after this loop because the target nid's new
+ * identity might not have been established yet. So
+ * new nid values are fabricated above num_online_nodes() and
+ * mapped back later to their true value.
+ */
+ /* MCD - This code is a bit complicated, but may be unnecessary now.
+ * We can now handle much more interesting node-numbering.
+ * The old requirement that 0 <= nid <= numnodes <= MAX_NUMNODES
+ * and that there be no holes in the numbering 0..numnodes
+ * has become simply 0 <= nid <= MAX_NUMNODES.
+ */
+ nid = 0;
+ for_each_online_node(i) {
+ if (test_bit(i, (void *) nodes_with_mem)) {
+ /*
+ * Save original nid value for numa_slit
+ * fixup and node_cpuid reassignments.
+ */
+ node_flip[nid] = i;
+
+ if (i == nid) {
+ nid++;
+ continue;
+ }
+
+ for (p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
+ if (p->nid == i)
+ p->nid = nid;
+
+ cpunid = nid;
+ nid++;
+ } else
+ cpunid = MAX_NUMNODES;
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ if (node_cpuid[cpu].nid == i) {
+ /*
+ * For nodes not being reassigned just
+ * fix the cpu's nid and reverse pxm map
+ */
+ if (cpunid < MAX_NUMNODES) {
+ pxm = nid_to_pxm_map[i];
+ pxm_to_nid_map[pxm] =
+ node_cpuid[cpu].nid = cpunid;
+ continue;
+ }
+
+ /*
+ * For nodes being reassigned, find best node by
+ * numa_slit information and then make a temporary
+ * nid value based on current nid and num_online_nodes().
+ */
+ slit = 0xff;
+ k = 2*num_online_nodes();
+ for_each_online_node(j) {
+ if (i == j)
+ continue;
+ else if (test_bit(j, (void *) nodes_with_mem)) {
+ cslit = numa_slit[i * num_online_nodes() + j];
+ if (cslit < slit) {
+ k = num_online_nodes() + j;
+ slit = cslit;
+ }
+ }
+ }
+
+ /* save old nid map so we can update the pxm */
+ old_nid_map[cpu] = node_cpuid[cpu].nid;
+ node_cpuid[cpu].nid = k;
+ }
+ }
+
+ /*
+ * Fixup temporary nid values for CPU-only nodes.
+ */
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ if (node_cpuid[cpu].nid == (2*num_online_nodes())) {
+ pxm = nid_to_pxm_map[old_nid_map[cpu]];
+ pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = nnode - 1;
+ } else {
+ for (i = 0; i < nnode; i++) {
+ if (node_flip[i] != (node_cpuid[cpu].nid - num_online_nodes()))
+ continue;
+
+ pxm = nid_to_pxm_map[old_nid_map[cpu]];
+ pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = i;
+ break;
+ }
+ }
+
+ /*
+ * Fix numa_slit by compressing from larger
+ * nid array to reduced nid array.
+ */
+ for (i = 0; i < nnode; i++)
+ for (j = 0; j < nnode; j++)
+ numa_slit_fix[i * nnode + j] =
+ numa_slit[node_flip[i] * num_online_nodes() + node_flip[j]];
+
+ memcpy(numa_slit, numa_slit_fix, sizeof (numa_slit));
+
+ nodes_clear(node_online_map);
+ for (i = 0; i < nnode; i++)
+ node_set_online(i);
+
+ return;
+}
+
+/*
+ * To prevent cache aliasing effects, align per-node structures so that they
+ * start at addresses that are strided by node number.
+ */
+#define NODEDATA_ALIGN(addr, node) \
+ ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + (node)*PERCPU_PAGE_SIZE)
+
+/**
+ * build_node_maps - callback to setup bootmem structs for each node
+ * @start: physical start of range
+ * @len: length of range
+ * @node: node where this range resides
+ *
+ * We allocate a struct bootmem_data for each piece of memory that we wish to
+ * treat as a virtually contiguous block (i.e. each node). Each such block
+ * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down
+ * if necessary. Any non-existent pages will simply be part of the virtual
+ * memmap. We also update min_low_pfn and max_low_pfn here as we receive
+ * memory ranges from the caller.
+ */
+static int __init build_node_maps(unsigned long start, unsigned long len,
+ int node)
+{
+ unsigned long cstart, epfn, end = start + len;
+ struct bootmem_data *bdp = &mem_data[node].bootmem_data;
+
+ epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
+ cstart = GRANULEROUNDDOWN(start);
+
+ if (!bdp->node_low_pfn) {
+ bdp->node_boot_start = cstart;
+ bdp->node_low_pfn = epfn;
+ } else {
+ bdp->node_boot_start = min(cstart, bdp->node_boot_start);
+ bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
+ }
+
+ min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT);
+ max_low_pfn = max(max_low_pfn, bdp->node_low_pfn);
+
+ return 0;
+}
+
+/**
+ * early_nr_phys_cpus_node - return number of physical cpus on a given node
+ * @node: node to check
+ *
+ * Count the number of physical cpus on @node. These are cpus that actually
+ * exist. We can't use nr_cpus_node() yet because
+ * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
+ * called yet.
+ */
+static int early_nr_phys_cpus_node(int node)
+{
+ int cpu, n = 0;
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ if (node == node_cpuid[cpu].nid)
+ if ((cpu == 0) || node_cpuid[cpu].phys_id)
+ n++;
+
+ return n;
+}
+
+
+/**
+ * early_nr_cpus_node - return number of cpus on a given node
+ * @node: node to check
+ *
+ * Count the number of cpus on @node. We can't use nr_cpus_node() yet because
+ * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
+ * called yet. Note that node 0 will also count all non-existent cpus.
+ */
+static int early_nr_cpus_node(int node)
+{
+ int cpu, n = 0;
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ if (node == node_cpuid[cpu].nid)
+ n++;
+
+ return n;
+}
+
+/**
+ * find_pernode_space - allocate memory for memory map and per-node structures
+ * @start: physical start of range
+ * @len: length of range
+ * @node: node where this range resides
+ *
+ * This routine reserves space for the per-cpu data struct, the list of
+ * pg_data_ts and the per-node data struct. Each node will have something like
+ * the following in the first chunk of addr. space large enough to hold it.
+ *
+ * ________________________
+ * | |
+ * |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first
+ * | PERCPU_PAGE_SIZE * | start and length big enough
+ * | cpus_on_this_node | Node 0 will also have entries for all non-existent cpus.
+ * |------------------------|
+ * | local pg_data_t * |
+ * |------------------------|
+ * | local ia64_node_data |
+ * |------------------------|
+ * | ??? |
+ * |________________________|
+ *
+ * Once this space has been set aside, the bootmem maps are initialized. We
+ * could probably move the allocation of the per-cpu and ia64_node_data space
+ * outside of this function and use alloc_bootmem_node(), but doing it here
+ * is straightforward and we get the alignments we want so...
+ */
+static int __init find_pernode_space(unsigned long start, unsigned long len,
+ int node)
+{
+ unsigned long epfn, cpu, cpus, phys_cpus;
+ unsigned long pernodesize = 0, pernode, pages, mapsize;
+ void *cpu_data;
+ struct bootmem_data *bdp = &mem_data[node].bootmem_data;
+
+ epfn = (start + len) >> PAGE_SHIFT;
+
+ pages = bdp->node_low_pfn - (bdp->node_boot_start >> PAGE_SHIFT);
+ mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
+
+ /*
+ * Make sure this memory falls within this node's usable memory
+ * since we may have thrown some away in build_maps().
+ */
+ if (start < bdp->node_boot_start || epfn > bdp->node_low_pfn)
+ return 0;
+
+ /* Don't setup this node's local space twice... */
+ if (mem_data[node].pernode_addr)
+ return 0;
+
+ /*
+ * Calculate total size needed, incl. what's necessary
+ * for good alignment and alias prevention.
+ */
+ cpus = early_nr_cpus_node(node);
+ phys_cpus = early_nr_phys_cpus_node(node);
+ pernodesize += PERCPU_PAGE_SIZE * cpus;
+ pernodesize += node * L1_CACHE_BYTES;
+ pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
+ pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
+ pernodesize = PAGE_ALIGN(pernodesize);
+ pernode = NODEDATA_ALIGN(start, node);
+
+ /* Is this range big enough for what we want to store here? */
+ if (start + len > (pernode + pernodesize + mapsize)) {
+ mem_data[node].pernode_addr = pernode;
+ mem_data[node].pernode_size = pernodesize;
+ memset(__va(pernode), 0, pernodesize);
+
+ cpu_data = (void *)pernode;
+ pernode += PERCPU_PAGE_SIZE * cpus;
+ pernode += node * L1_CACHE_BYTES;
+
+ mem_data[node].pgdat = __va(pernode);
+ pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
+
+ mem_data[node].node_data = __va(pernode);
+ pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
+
+ mem_data[node].pgdat->bdata = bdp;
+ pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
+
+ /*
+ * Copy the static per-cpu data into the region we
+ * just set aside and then setup __per_cpu_offset
+ * for each CPU on this node.
+ */
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ if (node == node_cpuid[cpu].nid) {
+ memcpy(__va(cpu_data), __phys_per_cpu_start,
+ __per_cpu_end - __per_cpu_start);
+ __per_cpu_offset[cpu] = (char*)__va(cpu_data) -
+ __per_cpu_start;
+ cpu_data += PERCPU_PAGE_SIZE;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * free_node_bootmem - free bootmem allocator memory for use
+ * @start: physical start of range
+ * @len: length of range
+ * @node: node where this range resides
+ *
+ * Simply calls the bootmem allocator to free the specified ranged from
+ * the given pg_data_t's bdata struct. After this function has been called
+ * for all the entries in the EFI memory map, the bootmem allocator will
+ * be ready to service allocation requests.
+ */
+static int __init free_node_bootmem(unsigned long start, unsigned long len,
+ int node)
+{
+ free_bootmem_node(mem_data[node].pgdat, start, len);
+
+ return 0;
+}
+
+/**
+ * reserve_pernode_space - reserve memory for per-node space
+ *
+ * Reserve the space used by the bootmem maps & per-node space in the boot
+ * allocator so that when we actually create the real mem maps we don't
+ * use their memory.
+ */
+static void __init reserve_pernode_space(void)
+{
+ unsigned long base, size, pages;
+ struct bootmem_data *bdp;
+ int node;
+
+ for_each_online_node(node) {
+ pg_data_t *pdp = mem_data[node].pgdat;
+
+ bdp = pdp->bdata;
+
+ /* First the bootmem_map itself */
+ pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT);
+ size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
+ base = __pa(bdp->node_bootmem_map);
+ reserve_bootmem_node(pdp, base, size);
+
+ /* Now the per-node space */
+ size = mem_data[node].pernode_size;
+ base = __pa(mem_data[node].pernode_addr);
+ reserve_bootmem_node(pdp, base, size);
+ }
+}
+
+/**
+ * initialize_pernode_data - fixup per-cpu & per-node pointers
+ *
+ * Each node's per-node area has a copy of the global pg_data_t list, so
+ * we copy that to each node here, as well as setting the per-cpu pointer
+ * to the local node data structure. The active_cpus field of the per-node
+ * structure gets setup by the platform_cpu_init() function later.
+ */
+static void __init initialize_pernode_data(void)
+{
+ int cpu, node;
+ pg_data_t *pgdat_list[MAX_NUMNODES];
+
+ for_each_online_node(node)
+ pgdat_list[node] = mem_data[node].pgdat;
+
+ /* Copy the pg_data_t list to each node and init the node field */
+ for_each_online_node(node) {
+ memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list,
+ sizeof(pgdat_list));
+ }
+
+ /* Set the node_data pointer for each per-cpu struct */
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ node = node_cpuid[cpu].nid;
+ per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
+ }
+}
+
+/**
+ * find_memory - walk the EFI memory map and setup the bootmem allocator
+ *
+ * Called early in boot to setup the bootmem allocator, and to
+ * allocate the per-cpu and per-node structures.
+ */
+void __init find_memory(void)
+{
+ int node;
+
+ reserve_memory();
+
+ if (num_online_nodes() == 0) {
+ printk(KERN_ERR "node info missing!\n");
+ node_set_online(0);
+ }
+
+ min_low_pfn = -1;
+ max_low_pfn = 0;
+
+ if (num_online_nodes() > 1)
+ reassign_cpu_only_nodes();
+
+ /* These actually end up getting called by call_pernode_memory() */
+ efi_memmap_walk(filter_rsvd_memory, build_node_maps);
+ efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
+
+ /*
+ * Initialize the boot memory maps in reverse order since that's
+ * what the bootmem allocator expects
+ */
+ for (node = MAX_NUMNODES - 1; node >= 0; node--) {
+ unsigned long pernode, pernodesize, map;
+ struct bootmem_data *bdp;
+
+ if (!node_online(node))
+ continue;
+
+ bdp = &mem_data[node].bootmem_data;
+ pernode = mem_data[node].pernode_addr;
+ pernodesize = mem_data[node].pernode_size;
+ map = pernode + pernodesize;
+
+ /* Sanity check... */
+ if (!pernode)
+ panic("pernode space for node %d "
+ "could not be allocated!", node);
+
+ init_bootmem_node(mem_data[node].pgdat,
+ map>>PAGE_SHIFT,
+ bdp->node_boot_start>>PAGE_SHIFT,
+ bdp->node_low_pfn);
+ }
+
+ efi_memmap_walk(filter_rsvd_memory, free_node_bootmem);
+
+ reserve_pernode_space();
+ initialize_pernode_data();
+
+ max_pfn = max_low_pfn;
+
+ find_initrd();
+}
+
+/**
+ * per_cpu_init - setup per-cpu variables
+ *
+ * find_pernode_space() does most of this already, we just need to set
+ * local_per_cpu_offset
+ */
+void *per_cpu_init(void)
+{
+ int cpu;
+
+ if (smp_processor_id() == 0) {
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ per_cpu(local_per_cpu_offset, cpu) =
+ __per_cpu_offset[cpu];
+ }
+ }
+
+ return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
+}
+
+/**
+ * show_mem - give short summary of memory stats
+ *
+ * Shows a simple page count of reserved and used pages in the system.
+ * For discontig machines, it does this on a per-pgdat basis.
+ */
+void show_mem(void)
+{
+ int i, total_reserved = 0;
+ int total_shared = 0, total_cached = 0;
+ unsigned long total_present = 0;
+ pg_data_t *pgdat;
+
+ printk("Mem-info:\n");
+ show_free_areas();
+ printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+ for_each_pgdat(pgdat) {
+ unsigned long present = pgdat->node_present_pages;
+ int shared = 0, cached = 0, reserved = 0;
+ printk("Node ID: %d\n", pgdat->node_id);
+ for(i = 0; i < pgdat->node_spanned_pages; i++) {
+ if (!ia64_pfn_valid(pgdat->node_start_pfn+i))
+ continue;
+ if (PageReserved(pgdat->node_mem_map+i))
+ reserved++;
+ else if (PageSwapCache(pgdat->node_mem_map+i))
+ cached++;
+ else if (page_count(pgdat->node_mem_map+i))
+ shared += page_count(pgdat->node_mem_map+i)-1;
+ }
+ total_present += present;
+ total_reserved += reserved;
+ total_cached += cached;
+ total_shared += shared;
+ printk("\t%ld pages of RAM\n", present);
+ printk("\t%d reserved pages\n", reserved);
+ printk("\t%d pages shared\n", shared);
+ printk("\t%d pages swap cached\n", cached);
+ }
+ printk("%ld pages of RAM\n", total_present);
+ printk("%d reserved pages\n", total_reserved);
+ printk("%d pages shared\n", total_shared);
+ printk("%d pages swap cached\n", total_cached);
+ printk("Total of %ld pages in page table cache\n", pgtable_cache_size);
+ printk("%d free buffer pages\n", nr_free_buffer_pages());
+}
+
+/**
+ * call_pernode_memory - use SRAT to call callback functions with node info
+ * @start: physical start of range
+ * @len: length of range
+ * @arg: function to call for each range
+ *
+ * efi_memmap_walk() knows nothing about layout of memory across nodes. Find
+ * out to which node a block of memory belongs. Ignore memory that we cannot
+ * identify, and split blocks that run across multiple nodes.
+ *
+ * Take this opportunity to round the start address up and the end address
+ * down to page boundaries.
+ */
+void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
+{
+ unsigned long rs, re, end = start + len;
+ void (*func)(unsigned long, unsigned long, int);
+ int i;
+
+ start = PAGE_ALIGN(start);
+ end &= PAGE_MASK;
+ if (start >= end)
+ return;
+
+ func = arg;
+
+ if (!num_node_memblks) {
+ /* No SRAT table, so assume one node (node 0) */
+ if (start < end)
+ (*func)(start, end - start, 0);
+ return;
+ }
+
+ for (i = 0; i < num_node_memblks; i++) {
+ rs = max(start, node_memblk[i].start_paddr);
+ re = min(end, node_memblk[i].start_paddr +
+ node_memblk[i].size);
+
+ if (rs < re)
+ (*func)(rs, re - rs, node_memblk[i].nid);
+
+ if (re == end)
+ break;
+ }
+}
+
+/**
+ * count_node_pages - callback to build per-node memory info structures
+ * @start: physical start of range
+ * @len: length of range
+ * @node: node where this range resides
+ *
+ * Each node has it's own number of physical pages, DMAable pages, start, and
+ * end page frame number. This routine will be called by call_pernode_memory()
+ * for each piece of usable memory and will setup these values for each node.
+ * Very similar to build_maps().
+ */
+static __init int count_node_pages(unsigned long start, unsigned long len, int node)
+{
+ unsigned long end = start + len;
+
+ mem_data[node].num_physpages += len >> PAGE_SHIFT;
+ if (start <= __pa(MAX_DMA_ADDRESS))
+ mem_data[node].num_dma_physpages +=
+ (min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT;
+ start = GRANULEROUNDDOWN(start);
+ start = ORDERROUNDDOWN(start);
+ end = GRANULEROUNDUP(end);
+ mem_data[node].max_pfn = max(mem_data[node].max_pfn,
+ end >> PAGE_SHIFT);
+ mem_data[node].min_pfn = min(mem_data[node].min_pfn,
+ start >> PAGE_SHIFT);
+
+ return 0;
+}
+
+/**
+ * paging_init - setup page tables
+ *
+ * paging_init() sets up the page tables for each node of the system and frees
+ * the bootmem allocator memory for general use.
+ */
+void __init paging_init(void)
+{
+ unsigned long max_dma;
+ unsigned long zones_size[MAX_NR_ZONES];
+ unsigned long zholes_size[MAX_NR_ZONES];
+ unsigned long pfn_offset = 0;
+ int node;
+
+ max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+
+ /* so min() will work in count_node_pages */
+ for_each_online_node(node)
+ mem_data[node].min_pfn = ~0UL;
+
+ efi_memmap_walk(filter_rsvd_memory, count_node_pages);
+
+ for_each_online_node(node) {
+ memset(zones_size, 0, sizeof(zones_size));
+ memset(zholes_size, 0, sizeof(zholes_size));
+
+ num_physpages += mem_data[node].num_physpages;
+
+ if (mem_data[node].min_pfn >= max_dma) {
+ /* All of this node's memory is above ZONE_DMA */
+ zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
+ mem_data[node].min_pfn;
+ zholes_size[ZONE_NORMAL] = mem_data[node].max_pfn -
+ mem_data[node].min_pfn -
+ mem_data[node].num_physpages;
+ } else if (mem_data[node].max_pfn < max_dma) {
+ /* All of this node's memory is in ZONE_DMA */
+ zones_size[ZONE_DMA] = mem_data[node].max_pfn -
+ mem_data[node].min_pfn;
+ zholes_size[ZONE_DMA] = mem_data[node].max_pfn -
+ mem_data[node].min_pfn -
+ mem_data[node].num_dma_physpages;
+ } else {
+ /* This node has memory in both zones */
+ zones_size[ZONE_DMA] = max_dma -
+ mem_data[node].min_pfn;
+ zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] -
+ mem_data[node].num_dma_physpages;
+ zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
+ max_dma;
+ zholes_size[ZONE_NORMAL] = zones_size[ZONE_NORMAL] -
+ (mem_data[node].num_physpages -
+ mem_data[node].num_dma_physpages);
+ }
+
+ if (node == 0) {
+ vmalloc_end -=
+ PAGE_ALIGN(max_low_pfn * sizeof(struct page));
+ vmem_map = (struct page *) vmalloc_end;
+
+ efi_memmap_walk(create_mem_map_page_table, NULL);
+ printk("Virtual mem_map starts at 0x%p\n", vmem_map);
+ }
+
+ pfn_offset = mem_data[node].min_pfn;
+
+ NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
+ free_area_init_node(node, NODE_DATA(node), zones_size,
+ pfn_offset, zholes_size);
+ }
+
+ zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
+}
diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c
new file mode 100644
index 00000000000..6d259e34f35
--- /dev/null
+++ b/arch/ia64/mm/extable.c
@@ -0,0 +1,90 @@
+/*
+ * Kernel exception handling table support. Derived from arch/alpha/mm/extable.c.
+ *
+ * Copyright (C) 1998, 1999, 2001-2002, 2004 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/config.h>
+#include <linux/sort.h>
+
+#include <asm/uaccess.h>
+#include <asm/module.h>
+
+static int cmp_ex(const void *a, const void *b)
+{
+ const struct exception_table_entry *l = a, *r = b;
+ u64 lip = (u64) &l->addr + l->addr;
+ u64 rip = (u64) &r->addr + r->addr;
+
+ /* avoid overflow */
+ if (lip > rip)
+ return 1;
+ if (lip < rip)
+ return -1;
+ return 0;
+}
+
+static void swap_ex(void *a, void *b, int size)
+{
+ struct exception_table_entry *l = a, *r = b, tmp;
+ u64 delta = (u64) r - (u64) l;
+
+ tmp = *l;
+ l->addr = r->addr + delta;
+ l->cont = r->cont + delta;
+ r->addr = tmp.addr - delta;
+ r->cont = tmp.cont - delta;
+}
+
+/*
+ * Sort the exception table. It's usually already sorted, but there
+ * may be unordered entries due to multiple text sections (such as the
+ * .init text section). Note that the exception-table-entries contain
+ * location-relative addresses, which requires a bit of care during
+ * sorting to avoid overflows in the offset members (e.g., it would
+ * not be safe to make a temporary copy of an exception-table entry on
+ * the stack, because the stack may be more than 2GB away from the
+ * exception-table).
+ */
+void sort_extable (struct exception_table_entry *start,
+ struct exception_table_entry *finish)
+{
+ sort(start, finish - start, sizeof(struct exception_table_entry),
+ cmp_ex, swap_ex);
+}
+
+const struct exception_table_entry *
+search_extable (const struct exception_table_entry *first,
+ const struct exception_table_entry *last,
+ unsigned long ip)
+{
+ const struct exception_table_entry *mid;
+ unsigned long mid_ip;
+ long diff;
+
+ while (first <= last) {
+ mid = &first[(last - first)/2];
+ mid_ip = (u64) &mid->addr + mid->addr;
+ diff = mid_ip - ip;
+ if (diff == 0)
+ return mid;
+ else if (diff < 0)
+ first = mid + 1;
+ else
+ last = mid - 1;
+ }
+ return NULL;
+}
+
+void
+ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e)
+{
+ long fix = (u64) &e->cont + e->cont;
+
+ regs->r8 = -EFAULT;
+ if (fix & 4)
+ regs->r9 = 0;
+ regs->cr_iip = fix & ~0xf;
+ ia64_psr(regs)->ri = fix & 0x3; /* set continuation slot number */
+}
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
new file mode 100644
index 00000000000..da859125aae
--- /dev/null
+++ b/arch/ia64/mm/fault.c
@@ -0,0 +1,261 @@
+/*
+ * MMU fault handling support.
+ *
+ * Copyright (C) 1998-2002 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+extern void die (char *, struct pt_regs *, long);
+
+/*
+ * This routine is analogous to expand_stack() but instead grows the
+ * register backing store (which grows towards higher addresses).
+ * Since the register backing store is access sequentially, we
+ * disallow growing the RBS by more than a page at a time. Note that
+ * the VM_GROWSUP flag can be set on any VM area but that's fine
+ * because the total process size is still limited by RLIMIT_STACK and
+ * RLIMIT_AS.
+ */
+static inline long
+expand_backing_store (struct vm_area_struct *vma, unsigned long address)
+{
+ unsigned long grow;
+
+ grow = PAGE_SIZE >> PAGE_SHIFT;
+ if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur
+ || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur))
+ return -ENOMEM;
+ vma->vm_end += PAGE_SIZE;
+ vma->vm_mm->total_vm += grow;
+ if (vma->vm_flags & VM_LOCKED)
+ vma->vm_mm->locked_vm += grow;
+ __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
+ return 0;
+}
+
+/*
+ * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
+ * (inside region 5, on ia64) and that page is present.
+ */
+static int
+mapped_kernel_page_is_present (unsigned long address)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *ptep, pte;
+
+ pgd = pgd_offset_k(address);
+ if (pgd_none(*pgd) || pgd_bad(*pgd))
+ return 0;
+
+ pud = pud_offset(pgd, address);
+ if (pud_none(*pud) || pud_bad(*pud))
+ return 0;
+
+ pmd = pmd_offset(pud, address);
+ if (pmd_none(*pmd) || pmd_bad(*pmd))
+ return 0;
+
+ ptep = pte_offset_kernel(pmd, address);
+ if (!ptep)
+ return 0;
+
+ pte = *ptep;
+ return pte_present(pte);
+}
+
+void
+ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs)
+{
+ int signal = SIGSEGV, code = SEGV_MAPERR;
+ struct vm_area_struct *vma, *prev_vma;
+ struct mm_struct *mm = current->mm;
+ struct siginfo si;
+ unsigned long mask;
+
+ /*
+ * If we're in an interrupt or have no user context, we must not take the fault..
+ */
+ if (in_atomic() || !mm)
+ goto no_context;
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+ /*
+ * If fault is in region 5 and we are in the kernel, we may already
+ * have the mmap_sem (pfn_valid macro is called during mmap). There
+ * is no vma for region 5 addr's anyway, so skip getting the semaphore
+ * and go directly to the exception handling code.
+ */
+
+ if ((REGION_NUMBER(address) == 5) && !user_mode(regs))
+ goto bad_area_no_up;
+#endif
+
+ down_read(&mm->mmap_sem);
+
+ vma = find_vma_prev(mm, address, &prev_vma);
+ if (!vma)
+ goto bad_area;
+
+ /* find_vma_prev() returns vma such that address < vma->vm_end or NULL */
+ if (address < vma->vm_start)
+ goto check_expansion;
+
+ good_area:
+ code = SEGV_ACCERR;
+
+ /* OK, we've got a good vm_area for this memory area. Check the access permissions: */
+
+# define VM_READ_BIT 0
+# define VM_WRITE_BIT 1
+# define VM_EXEC_BIT 2
+
+# if (((1 << VM_READ_BIT) != VM_READ || (1 << VM_WRITE_BIT) != VM_WRITE) \
+ || (1 << VM_EXEC_BIT) != VM_EXEC)
+# error File is out of sync with <linux/mm.h>. Please update.
+# endif
+
+ mask = ( (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
+ | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)
+ | (((isr >> IA64_ISR_R_BIT) & 1UL) << VM_READ_BIT));
+
+ if ((vma->vm_flags & mask) != mask)
+ goto bad_area;
+
+ survive:
+ /*
+ * If for any reason at all we couldn't handle the fault, make
+ * sure we exit gracefully rather than endlessly redo the
+ * fault.
+ */
+ switch (handle_mm_fault(mm, vma, address, (mask & VM_WRITE) != 0)) {
+ case VM_FAULT_MINOR:
+ ++current->min_flt;
+ break;
+ case VM_FAULT_MAJOR:
+ ++current->maj_flt;
+ break;
+ case VM_FAULT_SIGBUS:
+ /*
+ * We ran out of memory, or some other thing happened
+ * to us that made us unable to handle the page fault
+ * gracefully.
+ */
+ signal = SIGBUS;
+ goto bad_area;
+ case VM_FAULT_OOM:
+ goto out_of_memory;
+ default:
+ BUG();
+ }
+ up_read(&mm->mmap_sem);
+ return;
+
+ check_expansion:
+ if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) {
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+ if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
+ || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
+ goto bad_area;
+ if (expand_stack(vma, address))
+ goto bad_area;
+ } else {
+ vma = prev_vma;
+ if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
+ || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
+ goto bad_area;
+ if (expand_backing_store(vma, address))
+ goto bad_area;
+ }
+ goto good_area;
+
+ bad_area:
+ up_read(&mm->mmap_sem);
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+ bad_area_no_up:
+#endif
+ if ((isr & IA64_ISR_SP)
+ || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
+ {
+ /*
+ * This fault was due to a speculative load or lfetch.fault, set the "ed"
+ * bit in the psr to ensure forward progress. (Target register will get a
+ * NaT for ld.s, lfetch will be canceled.)
+ */
+ ia64_psr(regs)->ed = 1;
+ return;
+ }
+ if (user_mode(regs)) {
+ si.si_signo = signal;
+ si.si_errno = 0;
+ si.si_code = code;
+ si.si_addr = (void __user *) address;
+ si.si_isr = isr;
+ si.si_flags = __ISR_VALID;
+ force_sig_info(signal, &si, current);
+ return;
+ }
+
+ no_context:
+ if (isr & IA64_ISR_SP) {
+ /*
+ * This fault was due to a speculative load set the "ed" bit in the psr to
+ * ensure forward progress (target register will get a NaT).
+ */
+ ia64_psr(regs)->ed = 1;
+ return;
+ }
+
+ if (ia64_done_with_exception(regs))
+ return;
+
+ /*
+ * Since we have no vma's for region 5, we might get here even if the address is
+ * valid, due to the VHPT walker inserting a non present translation that becomes
+ * stale. If that happens, the non present fault handler already purged the stale
+ * translation, which fixed the problem. So, we check to see if the translation is
+ * valid, and return if it is.
+ */
+ if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address))
+ return;
+
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have to terminate things
+ * with extreme prejudice.
+ */
+ bust_spinlocks(1);
+
+ if (address < PAGE_SIZE)
+ printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address);
+ else
+ printk(KERN_ALERT "Unable to handle kernel paging request at "
+ "virtual address %016lx\n", address);
+ die("Oops", regs, isr);
+ bust_spinlocks(0);
+ do_exit(SIGKILL);
+ return;
+
+ out_of_memory:
+ up_read(&mm->mmap_sem);
+ if (current->pid == 1) {
+ yield();
+ down_read(&mm->mmap_sem);
+ goto survive;
+ }
+ printk(KERN_CRIT "VM: killing process %s\n", current->comm);
+ if (user_mode(regs))
+ do_exit(SIGKILL);
+ goto no_context;
+}
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
new file mode 100644
index 00000000000..40ad8328ffd
--- /dev/null
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -0,0 +1,357 @@
+/*
+ * IA-64 Huge TLB Page Support for Kernel.
+ *
+ * Copyright (C) 2002-2004 Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 2003-2004 Ken Chen <kenneth.w.chen@intel.com>
+ *
+ * Sep, 2003: add numa support
+ * Feb, 2004: dynamic hugetlb page size via boot parameter
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/slab.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT;
+
+static pte_t *
+huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
+{
+ unsigned long taddr = htlbpage_to_page(addr);
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte = NULL;
+
+ pgd = pgd_offset(mm, taddr);
+ pud = pud_alloc(mm, pgd, taddr);
+ if (pud) {
+ pmd = pmd_alloc(mm, pud, taddr);
+ if (pmd)
+ pte = pte_alloc_map(mm, pmd, taddr);
+ }
+ return pte;
+}
+
+static pte_t *
+huge_pte_offset (struct mm_struct *mm, unsigned long addr)
+{
+ unsigned long taddr = htlbpage_to_page(addr);
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte = NULL;
+
+ pgd = pgd_offset(mm, taddr);
+ if (pgd_present(*pgd)) {
+ pud = pud_offset(pgd, taddr);
+ if (pud_present(*pud)) {
+ pmd = pmd_offset(pud, taddr);
+ if (pmd_present(*pmd))
+ pte = pte_offset_map(pmd, taddr);
+ }
+ }
+
+ return pte;
+}
+
+#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
+
+static void
+set_huge_pte (struct mm_struct *mm, struct vm_area_struct *vma,
+ struct page *page, pte_t * page_table, int write_access)
+{
+ pte_t entry;
+
+ add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
+ if (write_access) {
+ entry =
+ pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
+ } else
+ entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
+ entry = pte_mkyoung(entry);
+ mk_pte_huge(entry);
+ set_pte(page_table, entry);
+ return;
+}
+/*
+ * This function checks for proper alignment of input addr and len parameters.
+ */
+int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
+{
+ if (len & ~HPAGE_MASK)
+ return -EINVAL;
+ if (addr & ~HPAGE_MASK)
+ return -EINVAL;
+ if (REGION_NUMBER(addr) != REGION_HPAGE)
+ return -EINVAL;
+
+ return 0;
+}
+
+int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
+ struct vm_area_struct *vma)
+{
+ pte_t *src_pte, *dst_pte, entry;
+ struct page *ptepage;
+ unsigned long addr = vma->vm_start;
+ unsigned long end = vma->vm_end;
+
+ while (addr < end) {
+ dst_pte = huge_pte_alloc(dst, addr);
+ if (!dst_pte)
+ goto nomem;
+ src_pte = huge_pte_offset(src, addr);
+ entry = *src_pte;
+ ptepage = pte_page(entry);
+ get_page(ptepage);
+ set_pte(dst_pte, entry);
+ add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
+ addr += HPAGE_SIZE;
+ }
+ return 0;
+nomem:
+ return -ENOMEM;
+}
+
+int
+follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct page **pages, struct vm_area_struct **vmas,
+ unsigned long *st, int *length, int i)
+{
+ pte_t *ptep, pte;
+ unsigned long start = *st;
+ unsigned long pstart;
+ int len = *length;
+ struct page *page;
+
+ do {
+ pstart = start & HPAGE_MASK;
+ ptep = huge_pte_offset(mm, start);
+ pte = *ptep;
+
+back1:
+ page = pte_page(pte);
+ if (pages) {
+ page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
+ get_page(page);
+ pages[i] = page;
+ }
+ if (vmas)
+ vmas[i] = vma;
+ i++;
+ len--;
+ start += PAGE_SIZE;
+ if (((start & HPAGE_MASK) == pstart) && len &&
+ (start < vma->vm_end))
+ goto back1;
+ } while (len && start < vma->vm_end);
+ *length = len;
+ *st = start;
+ return i;
+}
+
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write)
+{
+ struct page *page;
+ pte_t *ptep;
+
+ if (REGION_NUMBER(addr) != REGION_HPAGE)
+ return ERR_PTR(-EINVAL);
+
+ ptep = huge_pte_offset(mm, addr);
+ if (!ptep || pte_none(*ptep))
+ return NULL;
+ page = pte_page(*ptep);
+ page += ((addr & ~HPAGE_MASK) >> PAGE_SHIFT);
+ return page;
+}
+int pmd_huge(pmd_t pmd)
+{
+ return 0;
+}
+struct page *
+follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write)
+{
+ return NULL;
+}
+
+/*
+ * Same as generic free_pgtables(), except constant PGDIR_* and pgd_offset
+ * are hugetlb region specific.
+ */
+void hugetlb_free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
+ unsigned long start, unsigned long end)
+{
+ unsigned long first = start & HUGETLB_PGDIR_MASK;
+ unsigned long last = end + HUGETLB_PGDIR_SIZE - 1;
+ struct mm_struct *mm = tlb->mm;
+
+ if (!prev) {
+ prev = mm->mmap;
+ if (!prev)
+ goto no_mmaps;
+ if (prev->vm_end > start) {
+ if (last > prev->vm_start)
+ last = prev->vm_start;
+ goto no_mmaps;
+ }
+ }
+ for (;;) {
+ struct vm_area_struct *next = prev->vm_next;
+
+ if (next) {
+ if (next->vm_start < start) {
+ prev = next;
+ continue;
+ }
+ if (last > next->vm_start)
+ last = next->vm_start;
+ }
+ if (prev->vm_end > first)
+ first = prev->vm_end;
+ break;
+ }
+no_mmaps:
+ if (last < first) /* for arches with discontiguous pgd indices */
+ return;
+ clear_page_range(tlb, first, last);
+}
+
+void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long address;
+ pte_t *pte;
+ struct page *page;
+
+ BUG_ON(start & (HPAGE_SIZE - 1));
+ BUG_ON(end & (HPAGE_SIZE - 1));
+
+ for (address = start; address < end; address += HPAGE_SIZE) {
+ pte = huge_pte_offset(mm, address);
+ if (pte_none(*pte))
+ continue;
+ page = pte_page(*pte);
+ put_page(page);
+ pte_clear(mm, address, pte);
+ }
+ add_mm_counter(mm, rss, - ((end - start) >> PAGE_SHIFT));
+ flush_tlb_range(vma, start, end);
+}
+
+int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long addr;
+ int ret = 0;
+
+ BUG_ON(vma->vm_start & ~HPAGE_MASK);
+ BUG_ON(vma->vm_end & ~HPAGE_MASK);
+
+ spin_lock(&mm->page_table_lock);
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
+ unsigned long idx;
+ pte_t *pte = huge_pte_alloc(mm, addr);
+ struct page *page;
+
+ if (!pte) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (!pte_none(*pte))
+ continue;
+
+ idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
+ + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+ page = find_get_page(mapping, idx);
+ if (!page) {
+ /* charge the fs quota first */
+ if (hugetlb_get_quota(mapping)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ page = alloc_huge_page();
+ if (!page) {
+ hugetlb_put_quota(mapping);
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
+ if (! ret) {
+ unlock_page(page);
+ } else {
+ hugetlb_put_quota(mapping);
+ page_cache_release(page);
+ goto out;
+ }
+ }
+ set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
+ }
+out:
+ spin_unlock(&mm->page_table_lock);
+ return ret;
+}
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ struct vm_area_struct *vmm;
+
+ if (len > RGN_MAP_LIMIT)
+ return -ENOMEM;
+ if (len & ~HPAGE_MASK)
+ return -EINVAL;
+ /* This code assumes that REGION_HPAGE != 0. */
+ if ((REGION_NUMBER(addr) != REGION_HPAGE) || (addr & (HPAGE_SIZE - 1)))
+ addr = HPAGE_REGION_BASE;
+ else
+ addr = ALIGN(addr, HPAGE_SIZE);
+ for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
+ /* At this point: (!vmm || addr < vmm->vm_end). */
+ if (REGION_OFFSET(addr) + len > RGN_MAP_LIMIT)
+ return -ENOMEM;
+ if (!vmm || (addr + len) <= vmm->vm_start)
+ return addr;
+ addr = ALIGN(vmm->vm_end, HPAGE_SIZE);
+ }
+}
+
+static int __init hugetlb_setup_sz(char *str)
+{
+ u64 tr_pages;
+ unsigned long long size;
+
+ if (ia64_pal_vm_page_size(&tr_pages, NULL) != 0)
+ /*
+ * shouldn't happen, but just in case.
+ */
+ tr_pages = 0x15557000UL;
+
+ size = memparse(str, &str);
+ if (*str || (size & (size-1)) || !(tr_pages & size) ||
+ size <= PAGE_SIZE ||
+ size >= (1UL << PAGE_SHIFT << MAX_ORDER)) {
+ printk(KERN_WARNING "Invalid huge page size specified\n");
+ return 1;
+ }
+
+ hpage_shift = __ffs(size);
+ /*
+ * boot cpu already executed ia64_mmu_init, and has HPAGE_SHIFT_DEFAULT
+ * override here with new page shift.
+ */
+ ia64_set_rr(HPAGE_REGION_BASE, hpage_shift << 2);
+ return 1;
+}
+__setup("hugepagesz=", hugetlb_setup_sz);
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
new file mode 100644
index 00000000000..65cf839573e
--- /dev/null
+++ b/arch/ia64/mm/init.c
@@ -0,0 +1,597 @@
+/*
+ * Initialize MMU support.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <linux/bootmem.h>
+#include <linux/efi.h>
+#include <linux/elf.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/personality.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/proc_fs.h>
+#include <linux/bitops.h>
+
+#include <asm/a.out.h>
+#include <asm/dma.h>
+#include <asm/ia32.h>
+#include <asm/io.h>
+#include <asm/machvec.h>
+#include <asm/numa.h>
+#include <asm/patch.h>
+#include <asm/pgalloc.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/tlb.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/mca.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+extern void ia64_tlb_init (void);
+
+unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+unsigned long vmalloc_end = VMALLOC_END_INIT;
+EXPORT_SYMBOL(vmalloc_end);
+struct page *vmem_map;
+EXPORT_SYMBOL(vmem_map);
+#endif
+
+static int pgt_cache_water[2] = { 25, 50 };
+
+struct page *zero_page_memmap_ptr; /* map entry for zero page */
+EXPORT_SYMBOL(zero_page_memmap_ptr);
+
+void
+check_pgt_cache (void)
+{
+ int low, high;
+
+ low = pgt_cache_water[0];
+ high = pgt_cache_water[1];
+
+ preempt_disable();
+ if (pgtable_cache_size > (u64) high) {
+ do {
+ if (pgd_quicklist)
+ free_page((unsigned long)pgd_alloc_one_fast(NULL));
+ if (pmd_quicklist)
+ free_page((unsigned long)pmd_alloc_one_fast(NULL, 0));
+ } while (pgtable_cache_size > (u64) low);
+ }
+ preempt_enable();
+}
+
+void
+lazy_mmu_prot_update (pte_t pte)
+{
+ unsigned long addr;
+ struct page *page;
+
+ if (!pte_exec(pte))
+ return; /* not an executable page... */
+
+ page = pte_page(pte);
+ addr = (unsigned long) page_address(page);
+
+ if (test_bit(PG_arch_1, &page->flags))
+ return; /* i-cache is already coherent with d-cache */
+
+ flush_icache_range(addr, addr + PAGE_SIZE);
+ set_bit(PG_arch_1, &page->flags); /* mark page as clean */
+}
+
+inline void
+ia64_set_rbs_bot (void)
+{
+ unsigned long stack_size = current->signal->rlim[RLIMIT_STACK].rlim_max & -16;
+
+ if (stack_size > MAX_USER_STACK_SIZE)
+ stack_size = MAX_USER_STACK_SIZE;
+ current->thread.rbs_bot = STACK_TOP - stack_size;
+}
+
+/*
+ * This performs some platform-dependent address space initialization.
+ * On IA-64, we want to setup the VM area for the register backing
+ * store (which grows upwards) and install the gateway page which is
+ * used for signal trampolines, etc.
+ */
+void
+ia64_init_addr_space (void)
+{
+ struct vm_area_struct *vma;
+
+ ia64_set_rbs_bot();
+
+ /*
+ * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore
+ * the problem. When the process attempts to write to the register backing store
+ * for the first time, it will get a SEGFAULT in this case.
+ */
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (vma) {
+ memset(vma, 0, sizeof(*vma));
+ vma->vm_mm = current->mm;
+ vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
+ vma->vm_end = vma->vm_start + PAGE_SIZE;
+ vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
+ vma->vm_flags = VM_DATA_DEFAULT_FLAGS | VM_GROWSUP;
+ down_write(&current->mm->mmap_sem);
+ if (insert_vm_struct(current->mm, vma)) {
+ up_write(&current->mm->mmap_sem);
+ kmem_cache_free(vm_area_cachep, vma);
+ return;
+ }
+ up_write(&current->mm->mmap_sem);
+ }
+
+ /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
+ if (!(current->personality & MMAP_PAGE_ZERO)) {
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (vma) {
+ memset(vma, 0, sizeof(*vma));
+ vma->vm_mm = current->mm;
+ vma->vm_end = PAGE_SIZE;
+ vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
+ vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED;
+ down_write(&current->mm->mmap_sem);
+ if (insert_vm_struct(current->mm, vma)) {
+ up_write(&current->mm->mmap_sem);
+ kmem_cache_free(vm_area_cachep, vma);
+ return;
+ }
+ up_write(&current->mm->mmap_sem);
+ }
+ }
+}
+
+void
+free_initmem (void)
+{
+ unsigned long addr, eaddr;
+
+ addr = (unsigned long) ia64_imva(__init_begin);
+ eaddr = (unsigned long) ia64_imva(__init_end);
+ while (addr < eaddr) {
+ ClearPageReserved(virt_to_page(addr));
+ set_page_count(virt_to_page(addr), 1);
+ free_page(addr);
+ ++totalram_pages;
+ addr += PAGE_SIZE;
+ }
+ printk(KERN_INFO "Freeing unused kernel memory: %ldkB freed\n",
+ (__init_end - __init_begin) >> 10);
+}
+
+void
+free_initrd_mem (unsigned long start, unsigned long end)
+{
+ struct page *page;
+ /*
+ * EFI uses 4KB pages while the kernel can use 4KB or bigger.
+ * Thus EFI and the kernel may have different page sizes. It is
+ * therefore possible to have the initrd share the same page as
+ * the end of the kernel (given current setup).
+ *
+ * To avoid freeing/using the wrong page (kernel sized) we:
+ * - align up the beginning of initrd
+ * - align down the end of initrd
+ *
+ * | |
+ * |=============| a000
+ * | |
+ * | |
+ * | | 9000
+ * |/////////////|
+ * |/////////////|
+ * |=============| 8000
+ * |///INITRD////|
+ * |/////////////|
+ * |/////////////| 7000
+ * | |
+ * |KKKKKKKKKKKKK|
+ * |=============| 6000
+ * |KKKKKKKKKKKKK|
+ * |KKKKKKKKKKKKK|
+ * K=kernel using 8KB pages
+ *
+ * In this example, we must free page 8000 ONLY. So we must align up
+ * initrd_start and keep initrd_end as is.
+ */
+ start = PAGE_ALIGN(start);
+ end = end & PAGE_MASK;
+
+ if (start < end)
+ printk(KERN_INFO "Freeing initrd memory: %ldkB freed\n", (end - start) >> 10);
+
+ for (; start < end; start += PAGE_SIZE) {
+ if (!virt_addr_valid(start))
+ continue;
+ page = virt_to_page(start);
+ ClearPageReserved(page);
+ set_page_count(page, 1);
+ free_page(start);
+ ++totalram_pages;
+ }
+}
+
+/*
+ * This installs a clean page in the kernel's page table.
+ */
+struct page *
+put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ if (!PageReserved(page))
+ printk(KERN_ERR "put_kernel_page: page at 0x%p not in reserved memory\n",
+ page_address(page));
+
+ pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */
+
+ spin_lock(&init_mm.page_table_lock);
+ {
+ pud = pud_alloc(&init_mm, pgd, address);
+ if (!pud)
+ goto out;
+
+ pmd = pmd_alloc(&init_mm, pud, address);
+ if (!pmd)
+ goto out;
+ pte = pte_alloc_map(&init_mm, pmd, address);
+ if (!pte)
+ goto out;
+ if (!pte_none(*pte)) {
+ pte_unmap(pte);
+ goto out;
+ }
+ set_pte(pte, mk_pte(page, pgprot));
+ pte_unmap(pte);
+ }
+ out: spin_unlock(&init_mm.page_table_lock);
+ /* no need for flush_tlb */
+ return page;
+}
+
+static void
+setup_gate (void)
+{
+ struct page *page;
+
+ /*
+ * Map the gate page twice: once read-only to export the ELF headers etc. and once
+ * execute-only page to enable privilege-promotion via "epc":
+ */
+ page = virt_to_page(ia64_imva(__start_gate_section));
+ put_kernel_page(page, GATE_ADDR, PAGE_READONLY);
+#ifdef HAVE_BUGGY_SEGREL
+ page = virt_to_page(ia64_imva(__start_gate_section + PAGE_SIZE));
+ put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE);
+#else
+ put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE);
+#endif
+ ia64_patch_gate();
+}
+
+void __devinit
+ia64_mmu_init (void *my_cpu_data)
+{
+ unsigned long psr, pta, impl_va_bits;
+ extern void __devinit tlb_init (void);
+
+#ifdef CONFIG_DISABLE_VHPT
+# define VHPT_ENABLE_BIT 0
+#else
+# define VHPT_ENABLE_BIT 1
+#endif
+
+ /* Pin mapping for percpu area into TLB */
+ psr = ia64_clear_ic();
+ ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
+ pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
+ PERCPU_PAGE_SHIFT);
+
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+
+ /*
+ * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
+ * address space. The IA-64 architecture guarantees that at least 50 bits of
+ * virtual address space are implemented but if we pick a large enough page size
+ * (e.g., 64KB), the mapped address space is big enough that it will overlap with
+ * VMLPT. I assume that once we run on machines big enough to warrant 64KB pages,
+ * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a
+ * problem in practice. Alternatively, we could truncate the top of the mapped
+ * address space to not permit mappings that would overlap with the VMLPT.
+ * --davidm 00/12/06
+ */
+# define pte_bits 3
+# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT)
+ /*
+ * The virtual page table has to cover the entire implemented address space within
+ * a region even though not all of this space may be mappable. The reason for
+ * this is that the Access bit and Dirty bit fault handlers perform
+ * non-speculative accesses to the virtual page table, so the address range of the
+ * virtual page table itself needs to be covered by virtual page table.
+ */
+# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
+# define POW2(n) (1ULL << (n))
+
+ impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
+
+ if (impl_va_bits < 51 || impl_va_bits > 61)
+ panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1);
+
+ /* place the VMLPT at the end of each page-table mapped region: */
+ pta = POW2(61) - POW2(vmlpt_bits);
+
+ if (POW2(mapped_space_bits) >= pta)
+ panic("mm/init: overlap between virtually mapped linear page table and "
+ "mapped kernel space!");
+ /*
+ * Set the (virtually mapped linear) page table address. Bit
+ * 8 selects between the short and long format, bits 2-7 the
+ * size of the table, and bit 0 whether the VHPT walker is
+ * enabled.
+ */
+ ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT);
+
+ ia64_tlb_init();
+
+#ifdef CONFIG_HUGETLB_PAGE
+ ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2);
+ ia64_srlz_d();
+#endif
+}
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+
+int
+create_mem_map_page_table (u64 start, u64 end, void *arg)
+{
+ unsigned long address, start_page, end_page;
+ struct page *map_start, *map_end;
+ int node;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
+ map_end = vmem_map + (__pa(end) >> PAGE_SHIFT);
+
+ start_page = (unsigned long) map_start & PAGE_MASK;
+ end_page = PAGE_ALIGN((unsigned long) map_end);
+ node = paddr_to_nid(__pa(start));
+
+ for (address = start_page; address < end_page; address += PAGE_SIZE) {
+ pgd = pgd_offset_k(address);
+ if (pgd_none(*pgd))
+ pgd_populate(&init_mm, pgd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+ pud = pud_offset(pgd, address);
+
+ if (pud_none(*pud))
+ pud_populate(&init_mm, pud, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+ pmd = pmd_offset(pud, address);
+
+ if (pmd_none(*pmd))
+ pmd_populate_kernel(&init_mm, pmd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+ pte = pte_offset_kernel(pmd, address);
+
+ if (pte_none(*pte))
+ set_pte(pte, pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >> PAGE_SHIFT,
+ PAGE_KERNEL));
+ }
+ return 0;
+}
+
+struct memmap_init_callback_data {
+ struct page *start;
+ struct page *end;
+ int nid;
+ unsigned long zone;
+};
+
+static int
+virtual_memmap_init (u64 start, u64 end, void *arg)
+{
+ struct memmap_init_callback_data *args;
+ struct page *map_start, *map_end;
+
+ args = (struct memmap_init_callback_data *) arg;
+ map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
+ map_end = vmem_map + (__pa(end) >> PAGE_SHIFT);
+
+ if (map_start < args->start)
+ map_start = args->start;
+ if (map_end > args->end)
+ map_end = args->end;
+
+ /*
+ * We have to initialize "out of bounds" struct page elements that fit completely
+ * on the same pages that were allocated for the "in bounds" elements because they
+ * may be referenced later (and found to be "reserved").
+ */
+ map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) / sizeof(struct page);
+ map_end += ((PAGE_ALIGN((unsigned long) map_end) - (unsigned long) map_end)
+ / sizeof(struct page));
+
+ if (map_start < map_end)
+ memmap_init_zone((unsigned long)(map_end - map_start),
+ args->nid, args->zone, page_to_pfn(map_start));
+ return 0;
+}
+
+void
+memmap_init (unsigned long size, int nid, unsigned long zone,
+ unsigned long start_pfn)
+{
+ if (!vmem_map)
+ memmap_init_zone(size, nid, zone, start_pfn);
+ else {
+ struct page *start;
+ struct memmap_init_callback_data args;
+
+ start = pfn_to_page(start_pfn);
+ args.start = start;
+ args.end = start + size;
+ args.nid = nid;
+ args.zone = zone;
+
+ efi_memmap_walk(virtual_memmap_init, &args);
+ }
+}
+
+int
+ia64_pfn_valid (unsigned long pfn)
+{
+ char byte;
+ struct page *pg = pfn_to_page(pfn);
+
+ return (__get_user(byte, (char __user *) pg) == 0)
+ && ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK))
+ || (__get_user(byte, (char __user *) (pg + 1) - 1) == 0));
+}
+EXPORT_SYMBOL(ia64_pfn_valid);
+
+int
+find_largest_hole (u64 start, u64 end, void *arg)
+{
+ u64 *max_gap = arg;
+
+ static u64 last_end = PAGE_OFFSET;
+
+ /* NOTE: this algorithm assumes efi memmap table is ordered */
+
+ if (*max_gap < (start - last_end))
+ *max_gap = start - last_end;
+ last_end = end;
+ return 0;
+}
+#endif /* CONFIG_VIRTUAL_MEM_MAP */
+
+static int
+count_reserved_pages (u64 start, u64 end, void *arg)
+{
+ unsigned long num_reserved = 0;
+ unsigned long *count = arg;
+
+ for (; start < end; start += PAGE_SIZE)
+ if (PageReserved(virt_to_page(start)))
+ ++num_reserved;
+ *count += num_reserved;
+ return 0;
+}
+
+/*
+ * Boot command-line option "nolwsys" can be used to disable the use of any light-weight
+ * system call handler. When this option is in effect, all fsyscalls will end up bubbling
+ * down into the kernel and calling the normal (heavy-weight) syscall handler. This is
+ * useful for performance testing, but conceivably could also come in handy for debugging
+ * purposes.
+ */
+
+static int nolwsys;
+
+static int __init
+nolwsys_setup (char *s)
+{
+ nolwsys = 1;
+ return 1;
+}
+
+__setup("nolwsys", nolwsys_setup);
+
+void
+mem_init (void)
+{
+ long reserved_pages, codesize, datasize, initsize;
+ unsigned long num_pgt_pages;
+ pg_data_t *pgdat;
+ int i;
+ static struct kcore_list kcore_mem, kcore_vmem, kcore_kernel;
+
+#ifdef CONFIG_PCI
+ /*
+ * This needs to be called _after_ the command line has been parsed but _before_
+ * any drivers that may need the PCI DMA interface are initialized or bootmem has
+ * been freed.
+ */
+ platform_dma_init();
+#endif
+
+#ifndef CONFIG_DISCONTIGMEM
+ if (!mem_map)
+ BUG();
+ max_mapnr = max_low_pfn;
+#endif
+
+ high_memory = __va(max_low_pfn * PAGE_SIZE);
+
+ kclist_add(&kcore_mem, __va(0), max_low_pfn * PAGE_SIZE);
+ kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
+ kclist_add(&kcore_kernel, _stext, _end - _stext);
+
+ for_each_pgdat(pgdat)
+ totalram_pages += free_all_bootmem_node(pgdat);
+
+ reserved_pages = 0;
+ efi_memmap_walk(count_reserved_pages, &reserved_pages);
+
+ codesize = (unsigned long) _etext - (unsigned long) _stext;
+ datasize = (unsigned long) _edata - (unsigned long) _etext;
+ initsize = (unsigned long) __init_end - (unsigned long) __init_begin;
+
+ printk(KERN_INFO "Memory: %luk/%luk available (%luk code, %luk reserved, "
+ "%luk data, %luk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10),
+ num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
+ reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10);
+
+ /*
+ * Allow for enough (cached) page table pages so that we can map the entire memory
+ * at least once. Each task also needs a couple of page tables pages, so add in a
+ * fudge factor for that (don't use "threads-max" here; that would be wrong!).
+ * Don't allow the cache to be more than 10% of total memory, though.
+ */
+# define NUM_TASKS 500 /* typical number of tasks */
+ num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS;
+ if (num_pgt_pages > nr_free_pages() / 10)
+ num_pgt_pages = nr_free_pages() / 10;
+ if (num_pgt_pages > (u64) pgt_cache_water[1])
+ pgt_cache_water[1] = num_pgt_pages;
+
+ /*
+ * For fsyscall entrpoints with no light-weight handler, use the ordinary
+ * (heavy-weight) handler, but mark it by setting bit 0, so the fsyscall entry
+ * code can tell them apart.
+ */
+ for (i = 0; i < NR_syscalls; ++i) {
+ extern unsigned long fsyscall_table[NR_syscalls];
+ extern unsigned long sys_call_table[NR_syscalls];
+
+ if (!fsyscall_table[i] || nolwsys)
+ fsyscall_table[i] = sys_call_table[i] | 1;
+ }
+ setup_gate();
+
+#ifdef CONFIG_IA32_SUPPORT
+ ia32_mem_init();
+#endif
+}
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
new file mode 100644
index 00000000000..77118bbf3d8
--- /dev/null
+++ b/arch/ia64/mm/numa.c
@@ -0,0 +1,49 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * This file contains NUMA specific variables and functions which can
+ * be split away from DISCONTIGMEM and are used on NUMA machines with
+ * contiguous memory.
+ *
+ * 2002/08/07 Erich Focht <efocht@ess.nec.de>
+ */
+
+#include <linux/config.h>
+#include <linux/cpu.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/node.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <asm/mmzone.h>
+#include <asm/numa.h>
+
+
+/*
+ * The following structures are usually initialized by ACPI or
+ * similar mechanisms and describe the NUMA characteristics of the machine.
+ */
+int num_node_memblks;
+struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
+struct node_cpuid_s node_cpuid[NR_CPUS];
+/*
+ * This is a matrix with "distances" between nodes, they should be
+ * proportional to the memory access latency ratios.
+ */
+u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES];
+
+/* Identify which cnode a physical address resides on */
+int
+paddr_to_nid(unsigned long paddr)
+{
+ int i;
+
+ for (i = 0; i < num_node_memblks; i++)
+ if (paddr >= node_memblk[i].start_paddr &&
+ paddr < node_memblk[i].start_paddr + node_memblk[i].size)
+ break;
+
+ return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0);
+}
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
new file mode 100644
index 00000000000..464557e4ed8
--- /dev/null
+++ b/arch/ia64/mm/tlb.c
@@ -0,0 +1,190 @@
+/*
+ * TLB support routines.
+ *
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 08/02/00 A. Mallick <asit.k.mallick@intel.com>
+ * Modified RID allocation for SMP
+ * Goutham Rao <goutham.rao@intel.com>
+ * IPI based ptc implementation and A-step IPI implementation.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+
+#include <asm/delay.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/pal.h>
+#include <asm/tlbflush.h>
+
+static struct {
+ unsigned long mask; /* mask of supported purge page-sizes */
+ unsigned long max_bits; /* log2() of largest supported purge page-size */
+} purge;
+
+struct ia64_ctx ia64_ctx = {
+ .lock = SPIN_LOCK_UNLOCKED,
+ .next = 1,
+ .limit = (1 << 15) - 1, /* start out with the safe (architected) limit */
+ .max_ctx = ~0U
+};
+
+DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
+
+/*
+ * Acquire the ia64_ctx.lock before calling this function!
+ */
+void
+wrap_mmu_context (struct mm_struct *mm)
+{
+ unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
+ struct task_struct *tsk;
+ int i;
+
+ if (ia64_ctx.next > max_ctx)
+ ia64_ctx.next = 300; /* skip daemons */
+ ia64_ctx.limit = max_ctx + 1;
+
+ /*
+ * Scan all the task's mm->context and set proper safe range
+ */
+
+ read_lock(&tasklist_lock);
+ repeat:
+ for_each_process(tsk) {
+ if (!tsk->mm)
+ continue;
+ tsk_context = tsk->mm->context;
+ if (tsk_context == ia64_ctx.next) {
+ if (++ia64_ctx.next >= ia64_ctx.limit) {
+ /* empty range: reset the range limit and start over */
+ if (ia64_ctx.next > max_ctx)
+ ia64_ctx.next = 300;
+ ia64_ctx.limit = max_ctx + 1;
+ goto repeat;
+ }
+ }
+ if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
+ ia64_ctx.limit = tsk_context;
+ }
+ read_unlock(&tasklist_lock);
+ /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
+ {
+ int cpu = get_cpu(); /* prevent preemption/migration */
+ for (i = 0; i < NR_CPUS; ++i)
+ if (cpu_online(i) && (i != cpu))
+ per_cpu(ia64_need_tlb_flush, i) = 1;
+ put_cpu();
+ }
+ local_flush_tlb_all();
+}
+
+void
+ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
+{
+ static DEFINE_SPINLOCK(ptcg_lock);
+
+ /* HW requires global serialization of ptc.ga. */
+ spin_lock(&ptcg_lock);
+ {
+ do {
+ /*
+ * Flush ALAT entries also.
+ */
+ ia64_ptcga(start, (nbits<<2));
+ ia64_srlz_i();
+ start += (1UL << nbits);
+ } while (start < end);
+ }
+ spin_unlock(&ptcg_lock);
+}
+
+void
+local_flush_tlb_all (void)
+{
+ unsigned long i, j, flags, count0, count1, stride0, stride1, addr;
+
+ addr = local_cpu_data->ptce_base;
+ count0 = local_cpu_data->ptce_count[0];
+ count1 = local_cpu_data->ptce_count[1];
+ stride0 = local_cpu_data->ptce_stride[0];
+ stride1 = local_cpu_data->ptce_stride[1];
+
+ local_irq_save(flags);
+ for (i = 0; i < count0; ++i) {
+ for (j = 0; j < count1; ++j) {
+ ia64_ptce(addr);
+ addr += stride1;
+ }
+ addr += stride0;
+ }
+ local_irq_restore(flags);
+ ia64_srlz_i(); /* srlz.i implies srlz.d */
+}
+
+void
+flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long size = end - start;
+ unsigned long nbits;
+
+ if (mm != current->active_mm) {
+ /* this does happen, but perhaps it's not worth optimizing for? */
+#ifdef CONFIG_SMP
+ flush_tlb_all();
+#else
+ mm->context = 0;
+#endif
+ return;
+ }
+
+ nbits = ia64_fls(size + 0xfff);
+ while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))
+ ++nbits;
+ if (nbits > purge.max_bits)
+ nbits = purge.max_bits;
+ start &= ~((1UL << nbits) - 1);
+
+# ifdef CONFIG_SMP
+ platform_global_tlb_purge(start, end, nbits);
+# else
+ do {
+ ia64_ptcl(start, (nbits<<2));
+ start += (1UL << nbits);
+ } while (start < end);
+# endif
+
+ ia64_srlz_i(); /* srlz.i implies srlz.d */
+}
+EXPORT_SYMBOL(flush_tlb_range);
+
+void __devinit
+ia64_tlb_init (void)
+{
+ ia64_ptce_info_t ptce_info;
+ unsigned long tr_pgbits;
+ long status;
+
+ if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
+ printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld;"
+ "defaulting to architected purge page-sizes.\n", status);
+ purge.mask = 0x115557000UL;
+ }
+ purge.max_bits = ia64_fls(purge.mask);
+
+ ia64_get_ptce(&ptce_info);
+ local_cpu_data->ptce_base = ptce_info.base;
+ local_cpu_data->ptce_count[0] = ptce_info.count[0];
+ local_cpu_data->ptce_count[1] = ptce_info.count[1];
+ local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
+ local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
+
+ local_flush_tlb_all(); /* nuke left overs from bootstrapping... */
+}
diff --git a/arch/ia64/module.lds b/arch/ia64/module.lds
new file mode 100644
index 00000000000..6481f42fbd8
--- /dev/null
+++ b/arch/ia64/module.lds
@@ -0,0 +1,13 @@
+SECTIONS {
+ /* Group unwind sections into a single section: */
+ .IA_64.unwind_info : { *(.IA_64.unwind_info*) }
+ .IA_64.unwind : { *(.IA_64.unwind*) }
+ /*
+ * Create place-holder sections to hold the PLTs, GOT, and
+ * official procedure-descriptors (.opd).
+ */
+ .core.plt : { BYTE(0) }
+ .init.plt : { BYTE(0) }
+ .got : { BYTE(0) }
+ .opd : { BYTE(0) }
+}
diff --git a/arch/ia64/oprofile/Kconfig b/arch/ia64/oprofile/Kconfig
new file mode 100644
index 00000000000..56e6f614b04
--- /dev/null
+++ b/arch/ia64/oprofile/Kconfig
@@ -0,0 +1,26 @@
+
+menu "Profiling support"
+ depends on EXPERIMENTAL
+
+config PROFILING
+ bool "Profiling support (EXPERIMENTAL)"
+ help
+ Say Y here to enable the extended profiling support mechanisms used
+ by profilers such as OProfile.
+
+config OPROFILE
+ tristate "OProfile system profiling (EXPERIMENTAL)"
+ depends on PROFILING
+ help
+ OProfile is a profiling system capable of profiling the
+ whole system, include the kernel, kernel modules, libraries,
+ and applications.
+
+ Due to firmware bugs, you may need to use the "nohalt" boot
+ option if you're using OProfile with the hardware performance
+ counters.
+
+ If unsure, say N.
+
+endmenu
+
diff --git a/arch/ia64/oprofile/Makefile b/arch/ia64/oprofile/Makefile
new file mode 100644
index 00000000000..aad27a718ee
--- /dev/null
+++ b/arch/ia64/oprofile/Makefile
@@ -0,0 +1,10 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
+ oprof.o cpu_buffer.o buffer_sync.o \
+ event_buffer.o oprofile_files.o \
+ oprofilefs.o oprofile_stats.o \
+ timer_int.o )
+
+oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-$(CONFIG_PERFMON) += perfmon.o
diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c
new file mode 100644
index 00000000000..b7dabbfb0d6
--- /dev/null
+++ b/arch/ia64/oprofile/backtrace.c
@@ -0,0 +1,150 @@
+/**
+ * @file backtrace.c
+ *
+ * @remark Copyright 2004 Silicon Graphics Inc. All Rights Reserved.
+ * @remark Read the file COPYING
+ *
+ * @author Greg Banks <gnb@melbourne.sgi.com>
+ * @author Keith Owens <kaos@melbourne.sgi.com>
+ * Based on work done for the ia64 port of the SGI kernprof patch, which is
+ * Copyright (c) 2003-2004 Silicon Graphics Inc. All Rights Reserved.
+ */
+
+#include <linux/oprofile.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+/*
+ * For IA64 we need to perform a complex little dance to get both
+ * the struct pt_regs and a synthetic struct switch_stack in place
+ * to allow the unwind code to work. This dance requires our unwind
+ * using code to be called from a function called from unw_init_running().
+ * There we only get a single void* data pointer, so use this struct
+ * to hold all the data we need during the unwind.
+ */
+typedef struct
+{
+ unsigned int depth;
+ struct pt_regs *regs;
+ struct unw_frame_info frame;
+ u64 *prev_pfs_loc; /* state for WAR for old spinlock ool code */
+} ia64_backtrace_t;
+
+#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
+/*
+ * Returns non-zero if the PC is in the spinlock contention out-of-line code
+ * with non-standard calling sequence (on older compilers).
+ */
+static __inline__ int in_old_ool_spinlock_code(unsigned long pc)
+{
+ extern const char ia64_spinlock_contention_pre3_4[] __attribute__ ((weak));
+ extern const char ia64_spinlock_contention_pre3_4_end[] __attribute__ ((weak));
+ unsigned long sc_start = (unsigned long)ia64_spinlock_contention_pre3_4;
+ unsigned long sc_end = (unsigned long)ia64_spinlock_contention_pre3_4_end;
+ return (sc_start && sc_end && pc >= sc_start && pc < sc_end);
+}
+#else
+/* Newer spinlock code does a proper br.call and works fine with the unwinder */
+#define in_old_ool_spinlock_code(pc) 0
+#endif
+
+/* Returns non-zero if the PC is in the Interrupt Vector Table */
+static __inline__ int in_ivt_code(unsigned long pc)
+{
+ extern char ia64_ivt[];
+ return (pc >= (u_long)ia64_ivt && pc < (u_long)ia64_ivt+32768);
+}
+
+/*
+ * Unwind to next stack frame.
+ */
+static __inline__ int next_frame(ia64_backtrace_t *bt)
+{
+ /*
+ * Avoid unsightly console message from unw_unwind() when attempting
+ * to unwind through the Interrupt Vector Table which has no unwind
+ * information.
+ */
+ if (in_ivt_code(bt->frame.ip))
+ return 0;
+
+ /*
+ * WAR for spinlock contention from leaf functions. ia64_spinlock_contention_pre3_4
+ * has ar.pfs == r0. Leaf functions do not modify ar.pfs so ar.pfs remains
+ * as 0, stopping the backtrace. Record the previous ar.pfs when the current
+ * IP is in ia64_spinlock_contention_pre3_4 then unwind, if pfs_loc has not changed
+ * after unwind then use pt_regs.ar_pfs which is where the real ar.pfs is for
+ * leaf functions.
+ */
+ if (bt->prev_pfs_loc && bt->regs && bt->frame.pfs_loc == bt->prev_pfs_loc)
+ bt->frame.pfs_loc = &bt->regs->ar_pfs;
+ bt->prev_pfs_loc = (in_old_ool_spinlock_code(bt->frame.ip) ? bt->frame.pfs_loc : NULL);
+
+ return unw_unwind(&bt->frame) == 0;
+}
+
+
+static void do_ia64_backtrace(struct unw_frame_info *info, void *vdata)
+{
+ ia64_backtrace_t *bt = vdata;
+ struct switch_stack *sw;
+ int count = 0;
+ u_long pc, sp;
+
+ sw = (struct switch_stack *)(info+1);
+ /* padding from unw_init_running */
+ sw = (struct switch_stack *)(((unsigned long)sw + 15) & ~15);
+
+ unw_init_frame_info(&bt->frame, current, sw);
+
+ /* skip over interrupt frame and oprofile calls */
+ do {
+ unw_get_sp(&bt->frame, &sp);
+ if (sp >= (u_long)bt->regs)
+ break;
+ if (!next_frame(bt))
+ return;
+ } while (count++ < 200);
+
+ /* finally, grab the actual sample */
+ while (bt->depth-- && next_frame(bt)) {
+ unw_get_ip(&bt->frame, &pc);
+ oprofile_add_trace(pc);
+ if (unw_is_intr_frame(&bt->frame)) {
+ /*
+ * Interrupt received on kernel stack; this can
+ * happen when timer interrupt fires while processing
+ * a softirq from the tail end of a hardware interrupt
+ * which interrupted a system call. Don't laugh, it
+ * happens! Splice the backtrace into two parts to
+ * avoid spurious cycles in the gprof output.
+ */
+ /* TODO: split rather than drop the 2nd half */
+ break;
+ }
+ }
+}
+
+void
+ia64_backtrace(struct pt_regs * const regs, unsigned int depth)
+{
+ ia64_backtrace_t bt;
+ unsigned long flags;
+
+ /*
+ * On IA64 there is little hope of getting backtraces from
+ * user space programs -- the problems of getting the unwind
+ * information from arbitrary user programs are extreme.
+ */
+ if (user_mode(regs))
+ return;
+
+ bt.depth = depth;
+ bt.regs = regs;
+ bt.prev_pfs_loc = NULL;
+ local_irq_save(flags);
+ unw_init_running(do_ia64_backtrace, &bt);
+ local_irq_restore(flags);
+}
diff --git a/arch/ia64/oprofile/init.c b/arch/ia64/oprofile/init.c
new file mode 100644
index 00000000000..125a602a660
--- /dev/null
+++ b/arch/ia64/oprofile/init.c
@@ -0,0 +1,38 @@
+/**
+ * @file init.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+extern int perfmon_init(struct oprofile_operations * ops);
+extern void perfmon_exit(void);
+extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth);
+
+int __init oprofile_arch_init(struct oprofile_operations * ops)
+{
+ int ret = -ENODEV;
+
+#ifdef CONFIG_PERFMON
+ /* perfmon_init() can fail, but we have no way to report it */
+ ret = perfmon_init(ops);
+#endif
+ ops->backtrace = ia64_backtrace;
+
+ return ret;
+}
+
+
+void oprofile_arch_exit(void)
+{
+#ifdef CONFIG_PERFMON
+ perfmon_exit();
+#endif
+}
diff --git a/arch/ia64/oprofile/perfmon.c b/arch/ia64/oprofile/perfmon.c
new file mode 100644
index 00000000000..b7975a469fb
--- /dev/null
+++ b/arch/ia64/oprofile/perfmon.c
@@ -0,0 +1,100 @@
+/**
+ * @file perfmon.c
+ *
+ * @remark Copyright 2003 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/oprofile.h>
+#include <linux/sched.h>
+#include <asm/perfmon.h>
+#include <asm/ptrace.h>
+#include <asm/errno.h>
+
+static int allow_ints;
+
+static int
+perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg,
+ struct pt_regs *regs, unsigned long stamp)
+{
+ int event = arg->pmd_eventid;
+
+ arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1;
+
+ /* the owner of the oprofile event buffer may have exited
+ * without perfmon being shutdown (e.g. SIGSEGV)
+ */
+ if (allow_ints)
+ oprofile_add_sample(regs, event);
+ return 0;
+}
+
+
+static int perfmon_start(void)
+{
+ allow_ints = 1;
+ return 0;
+}
+
+
+static void perfmon_stop(void)
+{
+ allow_ints = 0;
+}
+
+
+#define OPROFILE_FMT_UUID { \
+ 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c }
+
+static pfm_buffer_fmt_t oprofile_fmt = {
+ .fmt_name = "oprofile_format",
+ .fmt_uuid = OPROFILE_FMT_UUID,
+ .fmt_handler = perfmon_handler,
+};
+
+
+static char * get_cpu_type(void)
+{
+ __u8 family = local_cpu_data->family;
+
+ switch (family) {
+ case 0x07:
+ return "ia64/itanium";
+ case 0x1f:
+ return "ia64/itanium2";
+ default:
+ return "ia64/ia64";
+ }
+}
+
+
+/* all the ops are handled via userspace for IA64 perfmon */
+
+static int using_perfmon;
+
+int perfmon_init(struct oprofile_operations * ops)
+{
+ int ret = pfm_register_buffer_fmt(&oprofile_fmt);
+ if (ret)
+ return -ENODEV;
+
+ ops->cpu_type = get_cpu_type();
+ ops->start = perfmon_start;
+ ops->stop = perfmon_stop;
+ using_perfmon = 1;
+ printk(KERN_INFO "oprofile: using perfmon.\n");
+ return 0;
+}
+
+
+void perfmon_exit(void)
+{
+ if (!using_perfmon)
+ return;
+
+ pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid);
+}
diff --git a/arch/ia64/pci/Makefile b/arch/ia64/pci/Makefile
new file mode 100644
index 00000000000..e66889e6922
--- /dev/null
+++ b/arch/ia64/pci/Makefile
@@ -0,0 +1,4 @@
+#
+# Makefile for the ia64-specific parts of the pci bus
+#
+obj-y := pci.o
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
new file mode 100644
index 00000000000..88641e5095b
--- /dev/null
+++ b/arch/ia64/pci/pci.c
@@ -0,0 +1,735 @@
+/*
+ * pci.c - Low-Level PCI Access in IA-64
+ *
+ * Derived from bios32.c of i386 tree.
+ *
+ * (c) Copyright 2002, 2005 Hewlett-Packard Development Company, L.P.
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Bjorn Helgaas <bjorn.helgaas@hp.com>
+ * Copyright (C) 2004 Silicon Graphics, Inc.
+ *
+ * Note: Above list of copyright holders is incomplete...
+ */
+#include <linux/config.h>
+
+#include <linux/acpi.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/sal.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+
+
+static int pci_routeirq;
+
+/*
+ * Low-level SAL-based PCI configuration access functions. Note that SAL
+ * calls are already serialized (via sal_lock), so we don't need another
+ * synchronization mechanism here.
+ */
+
+#define PCI_SAL_ADDRESS(seg, bus, devfn, reg) \
+ (((u64) seg << 24) | (bus << 16) | (devfn << 8) | (reg))
+
+/* SAL 3.2 adds support for extended config space. */
+
+#define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg) \
+ (((u64) seg << 28) | (bus << 20) | (devfn << 12) | (reg))
+
+static int
+pci_sal_read (unsigned int seg, unsigned int bus, unsigned int devfn,
+ int reg, int len, u32 *value)
+{
+ u64 addr, data = 0;
+ int mode, result;
+
+ if (!value || (seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095))
+ return -EINVAL;
+
+ if ((seg | reg) <= 255) {
+ addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg);
+ mode = 0;
+ } else {
+ addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg);
+ mode = 1;
+ }
+ result = ia64_sal_pci_config_read(addr, mode, len, &data);
+ if (result != 0)
+ return -EINVAL;
+
+ *value = (u32) data;
+ return 0;
+}
+
+static int
+pci_sal_write (unsigned int seg, unsigned int bus, unsigned int devfn,
+ int reg, int len, u32 value)
+{
+ u64 addr;
+ int mode, result;
+
+ if ((seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095))
+ return -EINVAL;
+
+ if ((seg | reg) <= 255) {
+ addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg);
+ mode = 0;
+ } else {
+ addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg);
+ mode = 1;
+ }
+ result = ia64_sal_pci_config_write(addr, mode, len, value);
+ if (result != 0)
+ return -EINVAL;
+ return 0;
+}
+
+static struct pci_raw_ops pci_sal_ops = {
+ .read = pci_sal_read,
+ .write = pci_sal_write
+};
+
+struct pci_raw_ops *raw_pci_ops = &pci_sal_ops;
+
+static int
+pci_read (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
+{
+ return raw_pci_ops->read(pci_domain_nr(bus), bus->number,
+ devfn, where, size, value);
+}
+
+static int
+pci_write (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
+{
+ return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
+ devfn, where, size, value);
+}
+
+struct pci_ops pci_root_ops = {
+ .read = pci_read,
+ .write = pci_write,
+};
+
+#ifdef CONFIG_NUMA
+extern acpi_status acpi_map_iosapic(acpi_handle, u32, void *, void **);
+static void acpi_map_iosapics(void)
+{
+ acpi_get_devices(NULL, acpi_map_iosapic, NULL, NULL);
+}
+#else
+static void acpi_map_iosapics(void)
+{
+ return;
+}
+#endif /* CONFIG_NUMA */
+
+static int __init
+pci_acpi_init (void)
+{
+ struct pci_dev *dev = NULL;
+
+ printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
+
+ acpi_map_iosapics();
+
+ if (pci_routeirq) {
+ /*
+ * PCI IRQ routing is set up by pci_enable_device(), but we
+ * also do it here in case there are still broken drivers that
+ * don't use pci_enable_device().
+ */
+ printk(KERN_INFO "PCI: Routing interrupts for all devices because \"pci=routeirq\" specified\n");
+ for_each_pci_dev(dev)
+ acpi_pci_irq_enable(dev);
+ } else
+ printk(KERN_INFO "PCI: If a device doesn't work, try \"pci=routeirq\". If it helps, post a report\n");
+
+ return 0;
+}
+
+subsys_initcall(pci_acpi_init);
+
+/* Called by ACPI when it finds a new root bus. */
+
+static struct pci_controller * __devinit
+alloc_pci_controller (int seg)
+{
+ struct pci_controller *controller;
+
+ controller = kmalloc(sizeof(*controller), GFP_KERNEL);
+ if (!controller)
+ return NULL;
+
+ memset(controller, 0, sizeof(*controller));
+ controller->segment = seg;
+ return controller;
+}
+
+static u64 __devinit
+add_io_space (struct acpi_resource_address64 *addr)
+{
+ u64 offset;
+ int sparse = 0;
+ int i;
+
+ if (addr->address_translation_offset == 0)
+ return IO_SPACE_BASE(0); /* part of legacy IO space */
+
+ if (addr->attribute.io.translation_attribute == ACPI_SPARSE_TRANSLATION)
+ sparse = 1;
+
+ offset = (u64) ioremap(addr->address_translation_offset, 0);
+ for (i = 0; i < num_io_spaces; i++)
+ if (io_space[i].mmio_base == offset &&
+ io_space[i].sparse == sparse)
+ return IO_SPACE_BASE(i);
+
+ if (num_io_spaces == MAX_IO_SPACES) {
+ printk("Too many IO port spaces\n");
+ return ~0;
+ }
+
+ i = num_io_spaces++;
+ io_space[i].mmio_base = offset;
+ io_space[i].sparse = sparse;
+
+ return IO_SPACE_BASE(i);
+}
+
+static acpi_status __devinit
+count_window (struct acpi_resource *resource, void *data)
+{
+ unsigned int *windows = (unsigned int *) data;
+ struct acpi_resource_address64 addr;
+ acpi_status status;
+
+ status = acpi_resource_to_address64(resource, &addr);
+ if (ACPI_SUCCESS(status))
+ if (addr.resource_type == ACPI_MEMORY_RANGE ||
+ addr.resource_type == ACPI_IO_RANGE)
+ (*windows)++;
+
+ return AE_OK;
+}
+
+struct pci_root_info {
+ struct pci_controller *controller;
+ char *name;
+};
+
+static __devinit acpi_status add_window(struct acpi_resource *res, void *data)
+{
+ struct pci_root_info *info = data;
+ struct pci_window *window;
+ struct acpi_resource_address64 addr;
+ acpi_status status;
+ unsigned long flags, offset = 0;
+ struct resource *root;
+
+ status = acpi_resource_to_address64(res, &addr);
+ if (!ACPI_SUCCESS(status))
+ return AE_OK;
+
+ if (!addr.address_length)
+ return AE_OK;
+
+ if (addr.resource_type == ACPI_MEMORY_RANGE) {
+ flags = IORESOURCE_MEM;
+ root = &iomem_resource;
+ offset = addr.address_translation_offset;
+ } else if (addr.resource_type == ACPI_IO_RANGE) {
+ flags = IORESOURCE_IO;
+ root = &ioport_resource;
+ offset = add_io_space(&addr);
+ if (offset == ~0)
+ return AE_OK;
+ } else
+ return AE_OK;
+
+ window = &info->controller->window[info->controller->windows++];
+ window->resource.name = info->name;
+ window->resource.flags = flags;
+ window->resource.start = addr.min_address_range + offset;
+ window->resource.end = addr.max_address_range + offset;
+ window->resource.child = NULL;
+ window->offset = offset;
+
+ if (insert_resource(root, &window->resource)) {
+ printk(KERN_ERR "alloc 0x%lx-0x%lx from %s for %s failed\n",
+ window->resource.start, window->resource.end,
+ root->name, info->name);
+ }
+
+ return AE_OK;
+}
+
+static void __devinit
+pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl)
+{
+ int i, j;
+
+ j = 0;
+ for (i = 0; i < ctrl->windows; i++) {
+ struct resource *res = &ctrl->window[i].resource;
+ /* HP's firmware has a hack to work around a Windows bug.
+ * Ignore these tiny memory ranges */
+ if ((res->flags & IORESOURCE_MEM) &&
+ (res->end - res->start < 16))
+ continue;
+ if (j >= PCI_BUS_NUM_RESOURCES) {
+ printk("Ignoring range [%lx-%lx] (%lx)\n", res->start,
+ res->end, res->flags);
+ continue;
+ }
+ bus->resource[j++] = res;
+ }
+}
+
+struct pci_bus * __devinit
+pci_acpi_scan_root(struct acpi_device *device, int domain, int bus)
+{
+ struct pci_root_info info;
+ struct pci_controller *controller;
+ unsigned int windows = 0;
+ struct pci_bus *pbus;
+ char *name;
+
+ controller = alloc_pci_controller(domain);
+ if (!controller)
+ goto out1;
+
+ controller->acpi_handle = device->handle;
+
+ acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window,
+ &windows);
+ controller->window = kmalloc(sizeof(*controller->window) * windows,
+ GFP_KERNEL);
+ if (!controller->window)
+ goto out2;
+
+ name = kmalloc(16, GFP_KERNEL);
+ if (!name)
+ goto out3;
+
+ sprintf(name, "PCI Bus %04x:%02x", domain, bus);
+ info.controller = controller;
+ info.name = name;
+ acpi_walk_resources(device->handle, METHOD_NAME__CRS, add_window,
+ &info);
+
+ pbus = pci_scan_bus(bus, &pci_root_ops, controller);
+ if (pbus)
+ pcibios_setup_root_windows(pbus, controller);
+
+ return pbus;
+
+out3:
+ kfree(controller->window);
+out2:
+ kfree(controller);
+out1:
+ return NULL;
+}
+
+void pcibios_resource_to_bus(struct pci_dev *dev,
+ struct pci_bus_region *region, struct resource *res)
+{
+ struct pci_controller *controller = PCI_CONTROLLER(dev);
+ unsigned long offset = 0;
+ int i;
+
+ for (i = 0; i < controller->windows; i++) {
+ struct pci_window *window = &controller->window[i];
+ if (!(window->resource.flags & res->flags))
+ continue;
+ if (window->resource.start > res->start)
+ continue;
+ if (window->resource.end < res->end)
+ continue;
+ offset = window->offset;
+ break;
+ }
+
+ region->start = res->start - offset;
+ region->end = res->end - offset;
+}
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+
+void pcibios_bus_to_resource(struct pci_dev *dev,
+ struct resource *res, struct pci_bus_region *region)
+{
+ struct pci_controller *controller = PCI_CONTROLLER(dev);
+ unsigned long offset = 0;
+ int i;
+
+ for (i = 0; i < controller->windows; i++) {
+ struct pci_window *window = &controller->window[i];
+ if (!(window->resource.flags & res->flags))
+ continue;
+ if (window->resource.start - window->offset > region->start)
+ continue;
+ if (window->resource.end - window->offset < region->end)
+ continue;
+ offset = window->offset;
+ break;
+ }
+
+ res->start = region->start + offset;
+ res->end = region->end + offset;
+}
+
+static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
+{
+ struct pci_bus_region region;
+ int i;
+ int limit = (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) ? \
+ PCI_BRIDGE_RESOURCES : PCI_NUM_RESOURCES;
+
+ for (i = 0; i < limit; i++) {
+ if (!dev->resource[i].flags)
+ continue;
+ region.start = dev->resource[i].start;
+ region.end = dev->resource[i].end;
+ pcibios_bus_to_resource(dev, &dev->resource[i], &region);
+ pci_claim_resource(dev, i);
+ }
+}
+
+/*
+ * Called after each bus is probed, but before its children are examined.
+ */
+void __devinit
+pcibios_fixup_bus (struct pci_bus *b)
+{
+ struct pci_dev *dev;
+
+ list_for_each_entry(dev, &b->devices, bus_list)
+ pcibios_fixup_device_resources(dev);
+
+ return;
+}
+
+void __devinit
+pcibios_update_irq (struct pci_dev *dev, int irq)
+{
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+
+ /* ??? FIXME -- record old value for shutdown. */
+}
+
+static inline int
+pcibios_enable_resources (struct pci_dev *dev, int mask)
+{
+ u16 cmd, old_cmd;
+ int idx;
+ struct resource *r;
+
+ if (!dev)
+ return -EINVAL;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ old_cmd = cmd;
+ for (idx=0; idx<6; idx++) {
+ /* Only set up the desired resources. */
+ if (!(mask & (1 << idx)))
+ continue;
+
+ r = &dev->resource[idx];
+ if (!r->start && r->end) {
+ printk(KERN_ERR
+ "PCI: Device %s not available because of resource collisions\n",
+ pci_name(dev));
+ return -EINVAL;
+ }
+ if (r->flags & IORESOURCE_IO)
+ cmd |= PCI_COMMAND_IO;
+ if (r->flags & IORESOURCE_MEM)
+ cmd |= PCI_COMMAND_MEMORY;
+ }
+ if (dev->resource[PCI_ROM_RESOURCE].start)
+ cmd |= PCI_COMMAND_MEMORY;
+ if (cmd != old_cmd) {
+ printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd);
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+ return 0;
+}
+
+int
+pcibios_enable_device (struct pci_dev *dev, int mask)
+{
+ int ret;
+
+ ret = pcibios_enable_resources(dev, mask);
+ if (ret < 0)
+ return ret;
+
+ return acpi_pci_irq_enable(dev);
+}
+
+#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
+void
+pcibios_disable_device (struct pci_dev *dev)
+{
+ acpi_pci_irq_disable(dev);
+}
+#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
+
+void
+pcibios_align_resource (void *data, struct resource *res,
+ unsigned long size, unsigned long align)
+{
+}
+
+/*
+ * PCI BIOS setup, always defaults to SAL interface
+ */
+char * __init
+pcibios_setup (char *str)
+{
+ if (!strcmp(str, "routeirq"))
+ pci_routeirq = 1;
+ return NULL;
+}
+
+int
+pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state, int write_combine)
+{
+ /*
+ * I/O space cannot be accessed via normal processor loads and
+ * stores on this platform.
+ */
+ if (mmap_state == pci_mmap_io)
+ /*
+ * XXX we could relax this for I/O spaces for which ACPI
+ * indicates that the space is 1-to-1 mapped. But at the
+ * moment, we don't support multiple PCI address spaces and
+ * the legacy I/O space is not 1-to-1 mapped, so this is moot.
+ */
+ return -EINVAL;
+
+ /*
+ * Leave vm_pgoff as-is, the PCI space address is the physical
+ * address on this platform.
+ */
+ vma->vm_flags |= (VM_SHM | VM_RESERVED | VM_IO);
+
+ if (write_combine && efi_range_is_wc(vma->vm_start,
+ vma->vm_end - vma->vm_start))
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ else
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start, vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
+/**
+ * ia64_pci_get_legacy_mem - generic legacy mem routine
+ * @bus: bus to get legacy memory base address for
+ *
+ * Find the base of legacy memory for @bus. This is typically the first
+ * megabyte of bus address space for @bus or is simply 0 on platforms whose
+ * chipsets support legacy I/O and memory routing. Returns the base address
+ * or an error pointer if an error occurred.
+ *
+ * This is the ia64 generic version of this routine. Other platforms
+ * are free to override it with a machine vector.
+ */
+char *ia64_pci_get_legacy_mem(struct pci_bus *bus)
+{
+ return (char *)__IA64_UNCACHED_OFFSET;
+}
+
+/**
+ * pci_mmap_legacy_page_range - map legacy memory space to userland
+ * @bus: bus whose legacy space we're mapping
+ * @vma: vma passed in by mmap
+ *
+ * Map legacy memory space for this device back to userspace using a machine
+ * vector to get the base address.
+ */
+int
+pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma)
+{
+ char *addr;
+
+ addr = pci_get_legacy_mem(bus);
+ if (IS_ERR(addr))
+ return PTR_ERR(addr);
+
+ vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_flags |= (VM_SHM | VM_RESERVED | VM_IO);
+
+ if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start, vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
+/**
+ * ia64_pci_legacy_read - read from legacy I/O space
+ * @bus: bus to read
+ * @port: legacy port value
+ * @val: caller allocated storage for returned value
+ * @size: number of bytes to read
+ *
+ * Simply reads @size bytes from @port and puts the result in @val.
+ *
+ * Again, this (and the write routine) are generic versions that can be
+ * overridden by the platform. This is necessary on platforms that don't
+ * support legacy I/O routing or that hard fail on legacy I/O timeouts.
+ */
+int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
+{
+ int ret = size;
+
+ switch (size) {
+ case 1:
+ *val = inb(port);
+ break;
+ case 2:
+ *val = inw(port);
+ break;
+ case 4:
+ *val = inl(port);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+/**
+ * ia64_pci_legacy_write - perform a legacy I/O write
+ * @bus: bus pointer
+ * @port: port to write
+ * @val: value to write
+ * @size: number of bytes to write from @val
+ *
+ * Simply writes @size bytes of @val to @port.
+ */
+int ia64_pci_legacy_write(struct pci_dev *bus, u16 port, u32 val, u8 size)
+{
+ int ret = 0;
+
+ switch (size) {
+ case 1:
+ outb(val, port);
+ break;
+ case 2:
+ outw(val, port);
+ break;
+ case 4:
+ outl(val, port);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+/**
+ * pci_cacheline_size - determine cacheline size for PCI devices
+ * @dev: void
+ *
+ * We want to use the line-size of the outer-most cache. We assume
+ * that this line-size is the same for all CPUs.
+ *
+ * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info().
+ *
+ * RETURNS: An appropriate -ERRNO error value on eror, or zero for success.
+ */
+static unsigned long
+pci_cacheline_size (void)
+{
+ u64 levels, unique_caches;
+ s64 status;
+ pal_cache_config_info_t cci;
+ static u8 cacheline_size;
+
+ if (cacheline_size)
+ return cacheline_size;
+
+ status = ia64_pal_cache_summary(&levels, &unique_caches);
+ if (status != 0) {
+ printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
+ __FUNCTION__, status);
+ return SMP_CACHE_BYTES;
+ }
+
+ status = ia64_pal_cache_config_info(levels - 1, /* cache_type (data_or_unified)= */ 2,
+ &cci);
+ if (status != 0) {
+ printk(KERN_ERR "%s: ia64_pal_cache_config_info() failed (status=%ld)\n",
+ __FUNCTION__, status);
+ return SMP_CACHE_BYTES;
+ }
+ cacheline_size = 1 << cci.pcci_line_size;
+ return cacheline_size;
+}
+
+/**
+ * pcibios_prep_mwi - helper function for drivers/pci/pci.c:pci_set_mwi()
+ * @dev: the PCI device for which MWI is enabled
+ *
+ * For ia64, we can get the cacheline sizes from PAL.
+ *
+ * RETURNS: An appropriate -ERRNO error value on eror, or zero for success.
+ */
+int
+pcibios_prep_mwi (struct pci_dev *dev)
+{
+ unsigned long desired_linesize, current_linesize;
+ int rc = 0;
+ u8 pci_linesize;
+
+ desired_linesize = pci_cacheline_size();
+
+ pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &pci_linesize);
+ current_linesize = 4 * pci_linesize;
+ if (desired_linesize != current_linesize) {
+ printk(KERN_WARNING "PCI: slot %s has incorrect PCI cache line size of %lu bytes,",
+ pci_name(dev), current_linesize);
+ if (current_linesize > desired_linesize) {
+ printk(" expected %lu bytes instead\n", desired_linesize);
+ rc = -EINVAL;
+ } else {
+ printk(" correcting to %lu\n", desired_linesize);
+ pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, desired_linesize / 4);
+ }
+ }
+ return rc;
+}
+
+int pci_vector_resources(int last, int nr_released)
+{
+ int count = nr_released;
+
+ count += (IA64_LAST_DEVICE_VECTOR - last);
+
+ return count;
+}
diff --git a/arch/ia64/scripts/check-gas b/arch/ia64/scripts/check-gas
new file mode 100755
index 00000000000..2499e0b2243
--- /dev/null
+++ b/arch/ia64/scripts/check-gas
@@ -0,0 +1,15 @@
+#!/bin/sh
+dir=$(dirname $0)
+CC=$1
+OBJDUMP=$2
+tmp=${TMPDIR:-/tmp}
+out=$tmp/out$$.o
+$CC -c $dir/check-gas-asm.S -o $out
+res=$($OBJDUMP -r --section .data $out | fgrep 00004 | tr -s ' ' |cut -f3 -d' ')
+rm -f $out
+if [ $res != ".text" ]; then
+ echo buggy
+else
+ echo good
+fi
+exit 0
diff --git a/arch/ia64/scripts/check-gas-asm.S b/arch/ia64/scripts/check-gas-asm.S
new file mode 100644
index 00000000000..010e1d227e5
--- /dev/null
+++ b/arch/ia64/scripts/check-gas-asm.S
@@ -0,0 +1,2 @@
+[1:] nop 0
+ .xdata4 ".data", 0, 1b-.
diff --git a/arch/ia64/scripts/check-model.c b/arch/ia64/scripts/check-model.c
new file mode 100644
index 00000000000..e1d4e86e3d6
--- /dev/null
+++ b/arch/ia64/scripts/check-model.c
@@ -0,0 +1 @@
+int __attribute__ ((__model__ (__small__))) x;
diff --git a/arch/ia64/scripts/check-segrel.S b/arch/ia64/scripts/check-segrel.S
new file mode 100644
index 00000000000..3be4e3dbeb8
--- /dev/null
+++ b/arch/ia64/scripts/check-segrel.S
@@ -0,0 +1,4 @@
+ .rodata
+ data4 @segrel(start)
+ .data
+start:
diff --git a/arch/ia64/scripts/check-segrel.lds b/arch/ia64/scripts/check-segrel.lds
new file mode 100644
index 00000000000..1c2f13e181d
--- /dev/null
+++ b/arch/ia64/scripts/check-segrel.lds
@@ -0,0 +1,11 @@
+SECTIONS {
+ . = SIZEOF_HEADERS;
+ .rodata : { *(.rodata) } :ro
+ . = 0xa0000;
+ .data : { *(.data) } :dat
+ /DISCARD/ : { *(*) }
+}
+PHDRS {
+ ro PT_LOAD FILEHDR PHDRS;
+ dat PT_LOAD;
+}
diff --git a/arch/ia64/scripts/check-serialize.S b/arch/ia64/scripts/check-serialize.S
new file mode 100644
index 00000000000..0400c106806
--- /dev/null
+++ b/arch/ia64/scripts/check-serialize.S
@@ -0,0 +1,2 @@
+ .serialize.data
+ .serialize.instruction
diff --git a/arch/ia64/scripts/check-text-align.S b/arch/ia64/scripts/check-text-align.S
new file mode 100644
index 00000000000..03f586abb73
--- /dev/null
+++ b/arch/ia64/scripts/check-text-align.S
@@ -0,0 +1,6 @@
+ .proc foo
+ .prologue
+foo: .save rp, r2
+ nop 0
+ .align 64
+ .endp foo
diff --git a/arch/ia64/scripts/toolchain-flags b/arch/ia64/scripts/toolchain-flags
new file mode 100755
index 00000000000..3f0c2adacb7
--- /dev/null
+++ b/arch/ia64/scripts/toolchain-flags
@@ -0,0 +1,53 @@
+#!/bin/sh
+#
+# Check whether linker can handle cross-segment @segrel():
+#
+CPPFLAGS=""
+CC=$1
+OBJDUMP=$2
+READELF=$3
+dir=$(dirname $0)
+tmp=${TMPDIR:-/tmp}
+out=$tmp/out$$
+
+# Check whether cross-segment segment-relative relocs work fine. We need
+# that for building the gate DSO:
+
+$CC -nostdlib -static -Wl,-T$dir/check-segrel.lds $dir/check-segrel.S -o $out
+res=$($OBJDUMP --full --section .rodata $out | fgrep 000 | cut -f3 -d' ')
+rm -f $out
+if [ $res != 00000a00 ]; then
+ CPPFLAGS="$CPPFLAGS -DHAVE_BUGGY_SEGREL"
+ cat >&2 <<EOF
+warning: your linker cannot handle cross-segment segment-relative relocations.
+ please upgrade to a newer version (it is safe to use this linker, but
+ the kernel will be bigger than strictly necessary).
+EOF
+fi
+
+# Check whether .align inside a function works as expected.
+
+$CC -c $dir/check-text-align.S -o $out
+$READELF -u $out | fgrep -q 'prologue(rlen=12)'
+res=$?
+rm -f $out
+if [ $res -eq 0 ]; then
+ CPPFLAGS="$CPPFLAGS -DHAVE_WORKING_TEXT_ALIGN"
+fi
+
+if ! $CC -c $dir/check-model.c -o $out 2>&1 | grep __model__ | grep -q attrib
+then
+ CPPFLAGS="$CPPFLAGS -DHAVE_MODEL_SMALL_ATTRIBUTE"
+fi
+rm -f $out
+
+# Check whether assembler supports .serialize.{data,instruction} directive.
+
+$CC -c $dir/check-serialize.S -o $out 2>/dev/null
+res=$?
+rm -f $out
+if [ $res -eq 0 ]; then
+ CPPFLAGS="$CPPFLAGS -DHAVE_SERIALIZE_DIRECTIVE"
+fi
+
+echo $CPPFLAGS
diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py
new file mode 100755
index 00000000000..c27849889e1
--- /dev/null
+++ b/arch/ia64/scripts/unwcheck.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+#
+# Usage: unwcheck.py FILE
+#
+# This script checks the unwind info of each function in file FILE
+# and verifies that the sum of the region-lengths matches the total
+# length of the function.
+#
+# Based on a shell/awk script originally written by Harish Patil,
+# which was converted to Perl by Matthew Chapman, which was converted
+# to Python by David Mosberger.
+#
+import os
+import re
+import sys
+
+if len(sys.argv) != 2:
+ print "Usage: %s FILE" % sys.argv[0]
+ sys.exit(2)
+
+readelf = os.getenv("READELF", "readelf")
+
+start_pattern = re.compile("<([^>]*)>: \[0x([0-9a-f]+)-0x([0-9a-f]+)\]")
+rlen_pattern = re.compile(".*rlen=([0-9]+)")
+
+def check_func (func, slots, rlen_sum):
+ if slots != rlen_sum:
+ global num_errors
+ num_errors += 1
+ if not func: func = "[%#x-%#x]" % (start, end)
+ print "ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum)
+ return
+
+num_funcs = 0
+num_errors = 0
+func = False
+slots = 0
+rlen_sum = 0
+for line in os.popen("%s -u %s" % (readelf, sys.argv[1])):
+ m = start_pattern.match(line)
+ if m:
+ check_func(func, slots, rlen_sum)
+
+ func = m.group(1)
+ start = long(m.group(2), 16)
+ end = long(m.group(3), 16)
+ slots = 3 * (end - start) / 16
+ rlen_sum = 0L
+ num_funcs += 1
+ else:
+ m = rlen_pattern.match(line)
+ if m:
+ rlen_sum += long(m.group(1))
+check_func(func, slots, rlen_sum)
+
+if num_errors == 0:
+ print "No errors detected in %u functions." % num_funcs
+else:
+ if num_errors > 1:
+ err="errors"
+ else:
+ err="error"
+ print "%u %s detected in %u functions." % (num_errors, err, num_funcs)
+ sys.exit(1)
diff --git a/arch/ia64/sn/Makefile b/arch/ia64/sn/Makefile
new file mode 100644
index 00000000000..a269f6d84c2
--- /dev/null
+++ b/arch/ia64/sn/Makefile
@@ -0,0 +1,14 @@
+# arch/ia64/sn/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2004 Silicon Graphics, Inc. All Rights Reserved.
+#
+# Makefile for the sn ia64 subplatform
+#
+
+CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
+
+obj-y += kernel/ pci/
diff --git a/arch/ia64/sn/include/ioerror.h b/arch/ia64/sn/include/ioerror.h
new file mode 100644
index 00000000000..e68f2b0789a
--- /dev/null
+++ b/arch/ia64/sn/include/ioerror.h
@@ -0,0 +1,81 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_IOERROR_H
+#define _ASM_IA64_SN_IOERROR_H
+
+/*
+ * IO error structure.
+ *
+ * This structure would expand to hold the information retrieved from
+ * all IO related error registers.
+ *
+ * This structure is defined to hold all system specific
+ * information related to a single error.
+ *
+ * This serves a couple of purpose.
+ * - Error handling often involves translating one form of address to other
+ * form. So, instead of having different data structures at each level,
+ * we have a single structure, and the appropriate fields get filled in
+ * at each layer.
+ * - This provides a way to dump all error related information in any layer
+ * of erorr handling (debugging aid).
+ *
+ * A second possibility is to allow each layer to define its own error
+ * data structure, and fill in the proper fields. This has the advantage
+ * of isolating the layers.
+ * A big concern is the potential stack usage (and overflow), if each layer
+ * defines these structures on stack (assuming we don't want to do kmalloc.
+ *
+ * Any layer wishing to pass extra information to a layer next to it in
+ * error handling hierarchy, can do so as a separate parameter.
+ */
+
+typedef struct io_error_s {
+ /* Bit fields indicating which structure fields are valid */
+ union {
+ struct {
+ unsigned ievb_errortype:1;
+ unsigned ievb_widgetnum:1;
+ unsigned ievb_widgetdev:1;
+ unsigned ievb_srccpu:1;
+ unsigned ievb_srcnode:1;
+ unsigned ievb_errnode:1;
+ unsigned ievb_sysioaddr:1;
+ unsigned ievb_xtalkaddr:1;
+ unsigned ievb_busspace:1;
+ unsigned ievb_busaddr:1;
+ unsigned ievb_vaddr:1;
+ unsigned ievb_memaddr:1;
+ unsigned ievb_epc:1;
+ unsigned ievb_ef:1;
+ unsigned ievb_tnum:1;
+ } iev_b;
+ unsigned iev_a;
+ } ie_v;
+
+ short ie_errortype; /* error type: extra info about error */
+ short ie_widgetnum; /* Widget number that's in error */
+ short ie_widgetdev; /* Device within widget in error */
+ cpuid_t ie_srccpu; /* CPU on srcnode generating error */
+ cnodeid_t ie_srcnode; /* Node which caused the error */
+ cnodeid_t ie_errnode; /* Node where error was noticed */
+ iopaddr_t ie_sysioaddr; /* Sys specific IO address */
+ iopaddr_t ie_xtalkaddr; /* Xtalk (48bit) addr of Error */
+ iopaddr_t ie_busspace; /* Bus specific address space */
+ iopaddr_t ie_busaddr; /* Bus specific address */
+ caddr_t ie_vaddr; /* Virtual address of error */
+ iopaddr_t ie_memaddr; /* Physical memory address */
+ caddr_t ie_epc; /* pc when error reported */
+ caddr_t ie_ef; /* eframe when error reported */
+ short ie_tnum; /* Xtalk TNUM field */
+} ioerror_t;
+
+#define IOERROR_INIT(e) do { (e)->ie_v.iev_a = 0; } while (0)
+#define IOERROR_SETVALUE(e,f,v) do { (e)->ie_ ## f = (v); (e)->ie_v.iev_b.ievb_ ## f = 1; } while (0)
+
+#endif /* _ASM_IA64_SN_IOERROR_H */
diff --git a/arch/ia64/sn/include/pci/pcibr_provider.h b/arch/ia64/sn/include/pci/pcibr_provider.h
new file mode 100644
index 00000000000..b1f05ffec70
--- /dev/null
+++ b/arch/ia64/sn/include/pci/pcibr_provider.h
@@ -0,0 +1,149 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H
+#define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H
+
+/* Workarounds */
+#define PV907516 (1 << 1) /* TIOCP: Don't write the write buffer flush reg */
+
+#define BUSTYPE_MASK 0x1
+
+/* Macros given a pcibus structure */
+#define IS_PCIX(ps) ((ps)->pbi_bridge_mode & BUSTYPE_MASK)
+#define IS_PCI_BRIDGE_ASIC(asic) (asic == PCIIO_ASIC_TYPE_PIC || \
+ asic == PCIIO_ASIC_TYPE_TIOCP)
+#define IS_PIC_SOFT(ps) (ps->pbi_bridge_type == PCIBR_BRIDGETYPE_PIC)
+
+
+/*
+ * The different PCI Bridge types supported on the SGI Altix platforms
+ */
+#define PCIBR_BRIDGETYPE_UNKNOWN -1
+#define PCIBR_BRIDGETYPE_PIC 2
+#define PCIBR_BRIDGETYPE_TIOCP 3
+
+/*
+ * Bridge 64bit Direct Map Attributes
+ */
+#define PCI64_ATTR_PREF (1ull << 59)
+#define PCI64_ATTR_PREC (1ull << 58)
+#define PCI64_ATTR_VIRTUAL (1ull << 57)
+#define PCI64_ATTR_BAR (1ull << 56)
+#define PCI64_ATTR_SWAP (1ull << 55)
+#define PCI64_ATTR_VIRTUAL1 (1ull << 54)
+
+#define PCI32_LOCAL_BASE 0
+#define PCI32_MAPPED_BASE 0x40000000
+#define PCI32_DIRECT_BASE 0x80000000
+
+#define IS_PCI32_MAPPED(x) ((uint64_t)(x) < PCI32_DIRECT_BASE && \
+ (uint64_t)(x) >= PCI32_MAPPED_BASE)
+#define IS_PCI32_DIRECT(x) ((uint64_t)(x) >= PCI32_MAPPED_BASE)
+
+
+/*
+ * Bridge PMU Address Transaltion Entry Attibutes
+ */
+#define PCI32_ATE_V (0x1 << 0)
+#define PCI32_ATE_CO (0x1 << 1)
+#define PCI32_ATE_PREC (0x1 << 2)
+#define PCI32_ATE_PREF (0x1 << 3)
+#define PCI32_ATE_BAR (0x1 << 4)
+#define PCI32_ATE_ADDR_SHFT 12
+
+#define MINIMAL_ATES_REQUIRED(addr, size) \
+ (IOPG(IOPGOFF(addr) + (size) - 1) == IOPG((size) - 1))
+
+#define MINIMAL_ATE_FLAG(addr, size) \
+ (MINIMAL_ATES_REQUIRED((uint64_t)addr, size) ? 1 : 0)
+
+/* bit 29 of the pci address is the SWAP bit */
+#define ATE_SWAPSHIFT 29
+#define ATE_SWAP_ON(x) ((x) |= (1 << ATE_SWAPSHIFT))
+#define ATE_SWAP_OFF(x) ((x) &= ~(1 << ATE_SWAPSHIFT))
+
+/*
+ * I/O page size
+ */
+#if PAGE_SIZE < 16384
+#define IOPFNSHIFT 12 /* 4K per mapped page */
+#else
+#define IOPFNSHIFT 14 /* 16K per mapped page */
+#endif
+
+#define IOPGSIZE (1 << IOPFNSHIFT)
+#define IOPG(x) ((x) >> IOPFNSHIFT)
+#define IOPGOFF(x) ((x) & (IOPGSIZE-1))
+
+#define PCIBR_DEV_SWAP_DIR (1ull << 19)
+#define PCIBR_CTRL_PAGE_SIZE (0x1 << 21)
+
+/*
+ * PMU resources.
+ */
+struct ate_resource{
+ uint64_t *ate;
+ uint64_t num_ate;
+ uint64_t lowest_free_index;
+};
+
+struct pcibus_info {
+ struct pcibus_bussoft pbi_buscommon; /* common header */
+ uint32_t pbi_moduleid;
+ short pbi_bridge_type;
+ short pbi_bridge_mode;
+
+ struct ate_resource pbi_int_ate_resource;
+ uint64_t pbi_int_ate_size;
+
+ uint64_t pbi_dir_xbase;
+ char pbi_hub_xid;
+
+ uint64_t pbi_devreg[8];
+ spinlock_t pbi_lock;
+
+ uint32_t pbi_valid_devices;
+ uint32_t pbi_enabled_devices;
+};
+
+/*
+ * pcibus_info structure locking macros
+ */
+inline static unsigned long
+pcibr_lock(struct pcibus_info *pcibus_info)
+{
+ unsigned long flag;
+ spin_lock_irqsave(&pcibus_info->pbi_lock, flag);
+ return(flag);
+}
+#define pcibr_unlock(pcibus_info, flag) spin_unlock_irqrestore(&pcibus_info->pbi_lock, flag)
+
+extern void *pcibr_bus_fixup(struct pcibus_bussoft *);
+extern uint64_t pcibr_dma_map(struct pcidev_info *, unsigned long, size_t, unsigned int);
+extern void pcibr_dma_unmap(struct pcidev_info *, dma_addr_t, int);
+
+/*
+ * prototypes for the bridge asic register access routines in pcibr_reg.c
+ */
+extern void pcireg_control_bit_clr(struct pcibus_info *, uint64_t);
+extern void pcireg_control_bit_set(struct pcibus_info *, uint64_t);
+extern uint64_t pcireg_tflush_get(struct pcibus_info *);
+extern uint64_t pcireg_intr_status_get(struct pcibus_info *);
+extern void pcireg_intr_enable_bit_clr(struct pcibus_info *, uint64_t);
+extern void pcireg_intr_enable_bit_set(struct pcibus_info *, uint64_t);
+extern void pcireg_intr_addr_addr_set(struct pcibus_info *, int, uint64_t);
+extern void pcireg_force_intr_set(struct pcibus_info *, int);
+extern uint64_t pcireg_wrb_flush_get(struct pcibus_info *, int);
+extern void pcireg_int_ate_set(struct pcibus_info *, int, uint64_t);
+extern uint64_t * pcireg_int_ate_addr(struct pcibus_info *, int);
+extern void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info);
+extern void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info);
+extern int pcibr_ate_alloc(struct pcibus_info *, int);
+extern void pcibr_ate_free(struct pcibus_info *, int);
+extern void ate_write(struct pcibus_info *, int, int, uint64_t);
+#endif
diff --git a/arch/ia64/sn/include/pci/pcibus_provider_defs.h b/arch/ia64/sn/include/pci/pcibus_provider_defs.h
new file mode 100644
index 00000000000..07065615bbe
--- /dev/null
+++ b/arch/ia64/sn/include/pci/pcibus_provider_defs.h
@@ -0,0 +1,43 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H
+#define _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H
+
+/*
+ * SN pci asic types. Do not ever renumber these or reuse values. The
+ * values must agree with what prom thinks they are.
+ */
+
+#define PCIIO_ASIC_TYPE_UNKNOWN 0
+#define PCIIO_ASIC_TYPE_PPB 1
+#define PCIIO_ASIC_TYPE_PIC 2
+#define PCIIO_ASIC_TYPE_TIOCP 3
+
+/*
+ * Common pciio bus provider data. There should be one of these as the
+ * first field in any pciio based provider soft structure (e.g. pcibr_soft
+ * tioca_soft, etc).
+ */
+
+struct pcibus_bussoft {
+ uint32_t bs_asic_type; /* chipset type */
+ uint32_t bs_xid; /* xwidget id */
+ uint64_t bs_persist_busnum; /* Persistent Bus Number */
+ uint64_t bs_legacy_io; /* legacy io pio addr */
+ uint64_t bs_legacy_mem; /* legacy mem pio addr */
+ uint64_t bs_base; /* widget base */
+ struct xwidget_info *bs_xwidget_info;
+};
+
+/*
+ * DMA mapping flags
+ */
+
+#define SN_PCIDMA_CONSISTENT 0x0001
+
+#endif /* _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H */
diff --git a/arch/ia64/sn/include/pci/pcidev.h b/arch/ia64/sn/include/pci/pcidev.h
new file mode 100644
index 00000000000..81eb95d3bf4
--- /dev/null
+++ b/arch/ia64/sn/include/pci/pcidev.h
@@ -0,0 +1,54 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_PCIDEV_H
+#define _ASM_IA64_SN_PCI_PCIDEV_H
+
+#include <linux/pci.h>
+
+extern struct sn_irq_info **sn_irq;
+
+#define SN_PCIDEV_INFO(pci_dev) \
+ ((struct pcidev_info *)(pci_dev)->sysdata)
+
+/*
+ * Given a pci_bus, return the sn pcibus_bussoft struct. Note that
+ * this only works for root busses, not for busses represented by PPB's.
+ */
+
+#define SN_PCIBUS_BUSSOFT(pci_bus) \
+ ((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data))
+
+/*
+ * Given a struct pci_dev, return the sn pcibus_bussoft struct. Note
+ * that this is not equivalent to SN_PCIBUS_BUSSOFT(pci_dev->bus) due
+ * due to possible PPB's in the path.
+ */
+
+#define SN_PCIDEV_BUSSOFT(pci_dev) \
+ (SN_PCIDEV_INFO(pci_dev)->pdi_host_pcidev_info->pdi_pcibus_info)
+
+#define PCIIO_BUS_NONE 255 /* bus 255 reserved */
+#define PCIIO_SLOT_NONE 255
+#define PCIIO_FUNC_NONE 255
+#define PCIIO_VENDOR_ID_NONE (-1)
+
+struct pcidev_info {
+ uint64_t pdi_pio_mapped_addr[7]; /* 6 BARs PLUS 1 ROM */
+ uint64_t pdi_slot_host_handle; /* Bus and devfn Host pci_dev */
+
+ struct pcibus_bussoft *pdi_pcibus_info; /* Kernel common bus soft */
+ struct pcidev_info *pdi_host_pcidev_info; /* Kernel Host pci_dev */
+ struct pci_dev *pdi_linux_pcidev; /* Kernel pci_dev */
+
+ struct sn_irq_info *pdi_sn_irq_info;
+};
+
+extern void sn_irq_fixup(struct pci_dev *pci_dev,
+ struct sn_irq_info *sn_irq_info);
+
+#endif /* _ASM_IA64_SN_PCI_PCIDEV_H */
diff --git a/arch/ia64/sn/include/pci/pic.h b/arch/ia64/sn/include/pci/pic.h
new file mode 100644
index 00000000000..fd18acecb1e
--- /dev/null
+++ b/arch/ia64/sn/include/pci/pic.h
@@ -0,0 +1,261 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_PIC_H
+#define _ASM_IA64_SN_PCI_PIC_H
+
+/*
+ * PIC AS DEVICE ZERO
+ * ------------------
+ *
+ * PIC handles PCI/X busses. PCI/X requires that the 'bridge' (i.e. PIC)
+ * be designated as 'device 0'. That is a departure from earlier SGI
+ * PCI bridges. Because of that we use config space 1 to access the
+ * config space of the first actual PCI device on the bus.
+ * Here's what the PIC manual says:
+ *
+ * The current PCI-X bus specification now defines that the parent
+ * hosts bus bridge (PIC for example) must be device 0 on bus 0. PIC
+ * reduced the total number of devices from 8 to 4 and removed the
+ * device registers and windows, now only supporting devices 0,1,2, and
+ * 3. PIC did leave all 8 configuration space windows. The reason was
+ * there was nothing to gain by removing them. Here in lies the problem.
+ * The device numbering we do using 0 through 3 is unrelated to the device
+ * numbering which PCI-X requires in configuration space. In the past we
+ * correlated Configs pace and our device space 0 <-> 0, 1 <-> 1, etc.
+ * PCI-X requires we start a 1, not 0 and currently the PX brick
+ * does associate our:
+ *
+ * device 0 with configuration space window 1,
+ * device 1 with configuration space window 2,
+ * device 2 with configuration space window 3,
+ * device 3 with configuration space window 4.
+ *
+ * The net effect is that all config space access are off-by-one with
+ * relation to other per-slot accesses on the PIC.
+ * Here is a table that shows some of that:
+ *
+ * Internal Slot#
+ * |
+ * | 0 1 2 3
+ * ----------|---------------------------------------
+ * config | 0x21000 0x22000 0x23000 0x24000
+ * |
+ * even rrb | 0[0] n/a 1[0] n/a [] == implied even/odd
+ * |
+ * odd rrb | n/a 0[1] n/a 1[1]
+ * |
+ * int dev | 00 01 10 11
+ * |
+ * ext slot# | 1 2 3 4
+ * ----------|---------------------------------------
+ */
+
+#define PIC_ATE_TARGETID_SHFT 8
+#define PIC_HOST_INTR_ADDR 0x0000FFFFFFFFFFFFUL
+#define PIC_PCI64_ATTR_TARG_SHFT 60
+
+
+/*****************************************************************************
+ *********************** PIC MMR structure mapping ***************************
+ *****************************************************************************/
+
+/* NOTE: PIC WAR. PV#854697. PIC does not allow writes just to [31:0]
+ * of a 64-bit register. When writing PIC registers, always write the
+ * entire 64 bits.
+ */
+
+struct pic {
+
+ /* 0x000000-0x00FFFF -- Local Registers */
+
+ /* 0x000000-0x000057 -- Standard Widget Configuration */
+ uint64_t p_wid_id; /* 0x000000 */
+ uint64_t p_wid_stat; /* 0x000008 */
+ uint64_t p_wid_err_upper; /* 0x000010 */
+ uint64_t p_wid_err_lower; /* 0x000018 */
+ #define p_wid_err p_wid_err_lower
+ uint64_t p_wid_control; /* 0x000020 */
+ uint64_t p_wid_req_timeout; /* 0x000028 */
+ uint64_t p_wid_int_upper; /* 0x000030 */
+ uint64_t p_wid_int_lower; /* 0x000038 */
+ #define p_wid_int p_wid_int_lower
+ uint64_t p_wid_err_cmdword; /* 0x000040 */
+ uint64_t p_wid_llp; /* 0x000048 */
+ uint64_t p_wid_tflush; /* 0x000050 */
+
+ /* 0x000058-0x00007F -- Bridge-specific Widget Configuration */
+ uint64_t p_wid_aux_err; /* 0x000058 */
+ uint64_t p_wid_resp_upper; /* 0x000060 */
+ uint64_t p_wid_resp_lower; /* 0x000068 */
+ #define p_wid_resp p_wid_resp_lower
+ uint64_t p_wid_tst_pin_ctrl; /* 0x000070 */
+ uint64_t p_wid_addr_lkerr; /* 0x000078 */
+
+ /* 0x000080-0x00008F -- PMU & MAP */
+ uint64_t p_dir_map; /* 0x000080 */
+ uint64_t _pad_000088; /* 0x000088 */
+
+ /* 0x000090-0x00009F -- SSRAM */
+ uint64_t p_map_fault; /* 0x000090 */
+ uint64_t _pad_000098; /* 0x000098 */
+
+ /* 0x0000A0-0x0000AF -- Arbitration */
+ uint64_t p_arb; /* 0x0000A0 */
+ uint64_t _pad_0000A8; /* 0x0000A8 */
+
+ /* 0x0000B0-0x0000BF -- Number In A Can or ATE Parity Error */
+ uint64_t p_ate_parity_err; /* 0x0000B0 */
+ uint64_t _pad_0000B8; /* 0x0000B8 */
+
+ /* 0x0000C0-0x0000FF -- PCI/GIO */
+ uint64_t p_bus_timeout; /* 0x0000C0 */
+ uint64_t p_pci_cfg; /* 0x0000C8 */
+ uint64_t p_pci_err_upper; /* 0x0000D0 */
+ uint64_t p_pci_err_lower; /* 0x0000D8 */
+ #define p_pci_err p_pci_err_lower
+ uint64_t _pad_0000E0[4]; /* 0x0000{E0..F8} */
+
+ /* 0x000100-0x0001FF -- Interrupt */
+ uint64_t p_int_status; /* 0x000100 */
+ uint64_t p_int_enable; /* 0x000108 */
+ uint64_t p_int_rst_stat; /* 0x000110 */
+ uint64_t p_int_mode; /* 0x000118 */
+ uint64_t p_int_device; /* 0x000120 */
+ uint64_t p_int_host_err; /* 0x000128 */
+ uint64_t p_int_addr[8]; /* 0x0001{30,,,68} */
+ uint64_t p_err_int_view; /* 0x000170 */
+ uint64_t p_mult_int; /* 0x000178 */
+ uint64_t p_force_always[8]; /* 0x0001{80,,,B8} */
+ uint64_t p_force_pin[8]; /* 0x0001{C0,,,F8} */
+
+ /* 0x000200-0x000298 -- Device */
+ uint64_t p_device[4]; /* 0x0002{00,,,18} */
+ uint64_t _pad_000220[4]; /* 0x0002{20,,,38} */
+ uint64_t p_wr_req_buf[4]; /* 0x0002{40,,,58} */
+ uint64_t _pad_000260[4]; /* 0x0002{60,,,78} */
+ uint64_t p_rrb_map[2]; /* 0x0002{80,,,88} */
+ #define p_even_resp p_rrb_map[0] /* 0x000280 */
+ #define p_odd_resp p_rrb_map[1] /* 0x000288 */
+ uint64_t p_resp_status; /* 0x000290 */
+ uint64_t p_resp_clear; /* 0x000298 */
+
+ uint64_t _pad_0002A0[12]; /* 0x0002{A0..F8} */
+
+ /* 0x000300-0x0003F8 -- Buffer Address Match Registers */
+ struct {
+ uint64_t upper; /* 0x0003{00,,,F0} */
+ uint64_t lower; /* 0x0003{08,,,F8} */
+ } p_buf_addr_match[16];
+
+ /* 0x000400-0x0005FF -- Performance Monitor Registers (even only) */
+ struct {
+ uint64_t flush_w_touch; /* 0x000{400,,,5C0} */
+ uint64_t flush_wo_touch; /* 0x000{408,,,5C8} */
+ uint64_t inflight; /* 0x000{410,,,5D0} */
+ uint64_t prefetch; /* 0x000{418,,,5D8} */
+ uint64_t total_pci_retry; /* 0x000{420,,,5E0} */
+ uint64_t max_pci_retry; /* 0x000{428,,,5E8} */
+ uint64_t max_latency; /* 0x000{430,,,5F0} */
+ uint64_t clear_all; /* 0x000{438,,,5F8} */
+ } p_buf_count[8];
+
+
+ /* 0x000600-0x0009FF -- PCI/X registers */
+ uint64_t p_pcix_bus_err_addr; /* 0x000600 */
+ uint64_t p_pcix_bus_err_attr; /* 0x000608 */
+ uint64_t p_pcix_bus_err_data; /* 0x000610 */
+ uint64_t p_pcix_pio_split_addr; /* 0x000618 */
+ uint64_t p_pcix_pio_split_attr; /* 0x000620 */
+ uint64_t p_pcix_dma_req_err_attr; /* 0x000628 */
+ uint64_t p_pcix_dma_req_err_addr; /* 0x000630 */
+ uint64_t p_pcix_timeout; /* 0x000638 */
+
+ uint64_t _pad_000640[120]; /* 0x000{640,,,9F8} */
+
+ /* 0x000A00-0x000BFF -- PCI/X Read&Write Buffer */
+ struct {
+ uint64_t p_buf_addr; /* 0x000{A00,,,AF0} */
+ uint64_t p_buf_attr; /* 0X000{A08,,,AF8} */
+ } p_pcix_read_buf_64[16];
+
+ struct {
+ uint64_t p_buf_addr; /* 0x000{B00,,,BE0} */
+ uint64_t p_buf_attr; /* 0x000{B08,,,BE8} */
+ uint64_t p_buf_valid; /* 0x000{B10,,,BF0} */
+ uint64_t __pad1; /* 0x000{B18,,,BF8} */
+ } p_pcix_write_buf_64[8];
+
+ /* End of Local Registers -- Start of Address Map space */
+
+ char _pad_000c00[0x010000 - 0x000c00];
+
+ /* 0x010000-0x011fff -- Internal ATE RAM (Auto Parity Generation) */
+ uint64_t p_int_ate_ram[1024]; /* 0x010000-0x011fff */
+
+ /* 0x012000-0x013fff -- Internal ATE RAM (Manual Parity Generation) */
+ uint64_t p_int_ate_ram_mp[1024]; /* 0x012000-0x013fff */
+
+ char _pad_014000[0x18000 - 0x014000];
+
+ /* 0x18000-0x197F8 -- PIC Write Request Ram */
+ uint64_t p_wr_req_lower[256]; /* 0x18000 - 0x187F8 */
+ uint64_t p_wr_req_upper[256]; /* 0x18800 - 0x18FF8 */
+ uint64_t p_wr_req_parity[256]; /* 0x19000 - 0x197F8 */
+
+ char _pad_019800[0x20000 - 0x019800];
+
+ /* 0x020000-0x027FFF -- PCI Device Configuration Spaces */
+ union {
+ uint8_t c[0x1000 / 1]; /* 0x02{0000,,,7FFF} */
+ uint16_t s[0x1000 / 2]; /* 0x02{0000,,,7FFF} */
+ uint32_t l[0x1000 / 4]; /* 0x02{0000,,,7FFF} */
+ uint64_t d[0x1000 / 8]; /* 0x02{0000,,,7FFF} */
+ union {
+ uint8_t c[0x100 / 1];
+ uint16_t s[0x100 / 2];
+ uint32_t l[0x100 / 4];
+ uint64_t d[0x100 / 8];
+ } f[8];
+ } p_type0_cfg_dev[8]; /* 0x02{0000,,,7FFF} */
+
+ /* 0x028000-0x028FFF -- PCI Type 1 Configuration Space */
+ union {
+ uint8_t c[0x1000 / 1]; /* 0x028000-0x029000 */
+ uint16_t s[0x1000 / 2]; /* 0x028000-0x029000 */
+ uint32_t l[0x1000 / 4]; /* 0x028000-0x029000 */
+ uint64_t d[0x1000 / 8]; /* 0x028000-0x029000 */
+ union {
+ uint8_t c[0x100 / 1];
+ uint16_t s[0x100 / 2];
+ uint32_t l[0x100 / 4];
+ uint64_t d[0x100 / 8];
+ } f[8];
+ } p_type1_cfg; /* 0x028000-0x029000 */
+
+ char _pad_029000[0x030000-0x029000];
+
+ /* 0x030000-0x030007 -- PCI Interrupt Acknowledge Cycle */
+ union {
+ uint8_t c[8 / 1];
+ uint16_t s[8 / 2];
+ uint32_t l[8 / 4];
+ uint64_t d[8 / 8];
+ } p_pci_iack; /* 0x030000-0x030007 */
+
+ char _pad_030007[0x040000-0x030008];
+
+ /* 0x040000-0x030007 -- PCIX Special Cycle */
+ union {
+ uint8_t c[8 / 1];
+ uint16_t s[8 / 2];
+ uint32_t l[8 / 4];
+ uint64_t d[8 / 8];
+ } p_pcix_cycle; /* 0x040000-0x040007 */
+};
+
+#endif /* _ASM_IA64_SN_PCI_PIC_H */
diff --git a/arch/ia64/sn/include/pci/tiocp.h b/arch/ia64/sn/include/pci/tiocp.h
new file mode 100644
index 00000000000..f07c83b2bf6
--- /dev/null
+++ b/arch/ia64/sn/include/pci/tiocp.h
@@ -0,0 +1,256 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_TIOCP_H
+#define _ASM_IA64_SN_PCI_TIOCP_H
+
+#define TIOCP_HOST_INTR_ADDR 0x003FFFFFFFFFFFFFUL
+#define TIOCP_PCI64_CMDTYPE_MEM (0x1ull << 60)
+
+
+/*****************************************************************************
+ *********************** TIOCP MMR structure mapping ***************************
+ *****************************************************************************/
+
+struct tiocp{
+
+ /* 0x000000-0x00FFFF -- Local Registers */
+
+ /* 0x000000-0x000057 -- (Legacy Widget Space) Configuration */
+ uint64_t cp_id; /* 0x000000 */
+ uint64_t cp_stat; /* 0x000008 */
+ uint64_t cp_err_upper; /* 0x000010 */
+ uint64_t cp_err_lower; /* 0x000018 */
+ #define cp_err cp_err_lower
+ uint64_t cp_control; /* 0x000020 */
+ uint64_t cp_req_timeout; /* 0x000028 */
+ uint64_t cp_intr_upper; /* 0x000030 */
+ uint64_t cp_intr_lower; /* 0x000038 */
+ #define cp_intr cp_intr_lower
+ uint64_t cp_err_cmdword; /* 0x000040 */
+ uint64_t _pad_000048; /* 0x000048 */
+ uint64_t cp_tflush; /* 0x000050 */
+
+ /* 0x000058-0x00007F -- Bridge-specific Configuration */
+ uint64_t cp_aux_err; /* 0x000058 */
+ uint64_t cp_resp_upper; /* 0x000060 */
+ uint64_t cp_resp_lower; /* 0x000068 */
+ #define cp_resp cp_resp_lower
+ uint64_t cp_tst_pin_ctrl; /* 0x000070 */
+ uint64_t cp_addr_lkerr; /* 0x000078 */
+
+ /* 0x000080-0x00008F -- PMU & MAP */
+ uint64_t cp_dir_map; /* 0x000080 */
+ uint64_t _pad_000088; /* 0x000088 */
+
+ /* 0x000090-0x00009F -- SSRAM */
+ uint64_t cp_map_fault; /* 0x000090 */
+ uint64_t _pad_000098; /* 0x000098 */
+
+ /* 0x0000A0-0x0000AF -- Arbitration */
+ uint64_t cp_arb; /* 0x0000A0 */
+ uint64_t _pad_0000A8; /* 0x0000A8 */
+
+ /* 0x0000B0-0x0000BF -- Number In A Can or ATE Parity Error */
+ uint64_t cp_ate_parity_err; /* 0x0000B0 */
+ uint64_t _pad_0000B8; /* 0x0000B8 */
+
+ /* 0x0000C0-0x0000FF -- PCI/GIO */
+ uint64_t cp_bus_timeout; /* 0x0000C0 */
+ uint64_t cp_pci_cfg; /* 0x0000C8 */
+ uint64_t cp_pci_err_upper; /* 0x0000D0 */
+ uint64_t cp_pci_err_lower; /* 0x0000D8 */
+ #define cp_pci_err cp_pci_err_lower
+ uint64_t _pad_0000E0[4]; /* 0x0000{E0..F8} */
+
+ /* 0x000100-0x0001FF -- Interrupt */
+ uint64_t cp_int_status; /* 0x000100 */
+ uint64_t cp_int_enable; /* 0x000108 */
+ uint64_t cp_int_rst_stat; /* 0x000110 */
+ uint64_t cp_int_mode; /* 0x000118 */
+ uint64_t cp_int_device; /* 0x000120 */
+ uint64_t cp_int_host_err; /* 0x000128 */
+ uint64_t cp_int_addr[8]; /* 0x0001{30,,,68} */
+ uint64_t cp_err_int_view; /* 0x000170 */
+ uint64_t cp_mult_int; /* 0x000178 */
+ uint64_t cp_force_always[8]; /* 0x0001{80,,,B8} */
+ uint64_t cp_force_pin[8]; /* 0x0001{C0,,,F8} */
+
+ /* 0x000200-0x000298 -- Device */
+ uint64_t cp_device[4]; /* 0x0002{00,,,18} */
+ uint64_t _pad_000220[4]; /* 0x0002{20,,,38} */
+ uint64_t cp_wr_req_buf[4]; /* 0x0002{40,,,58} */
+ uint64_t _pad_000260[4]; /* 0x0002{60,,,78} */
+ uint64_t cp_rrb_map[2]; /* 0x0002{80,,,88} */
+ #define cp_even_resp cp_rrb_map[0] /* 0x000280 */
+ #define cp_odd_resp cp_rrb_map[1] /* 0x000288 */
+ uint64_t cp_resp_status; /* 0x000290 */
+ uint64_t cp_resp_clear; /* 0x000298 */
+
+ uint64_t _pad_0002A0[12]; /* 0x0002{A0..F8} */
+
+ /* 0x000300-0x0003F8 -- Buffer Address Match Registers */
+ struct {
+ uint64_t upper; /* 0x0003{00,,,F0} */
+ uint64_t lower; /* 0x0003{08,,,F8} */
+ } cp_buf_addr_match[16];
+
+ /* 0x000400-0x0005FF -- Performance Monitor Registers (even only) */
+ struct {
+ uint64_t flush_w_touch; /* 0x000{400,,,5C0} */
+ uint64_t flush_wo_touch; /* 0x000{408,,,5C8} */
+ uint64_t inflight; /* 0x000{410,,,5D0} */
+ uint64_t prefetch; /* 0x000{418,,,5D8} */
+ uint64_t total_pci_retry; /* 0x000{420,,,5E0} */
+ uint64_t max_pci_retry; /* 0x000{428,,,5E8} */
+ uint64_t max_latency; /* 0x000{430,,,5F0} */
+ uint64_t clear_all; /* 0x000{438,,,5F8} */
+ } cp_buf_count[8];
+
+
+ /* 0x000600-0x0009FF -- PCI/X registers */
+ uint64_t cp_pcix_bus_err_addr; /* 0x000600 */
+ uint64_t cp_pcix_bus_err_attr; /* 0x000608 */
+ uint64_t cp_pcix_bus_err_data; /* 0x000610 */
+ uint64_t cp_pcix_pio_split_addr; /* 0x000618 */
+ uint64_t cp_pcix_pio_split_attr; /* 0x000620 */
+ uint64_t cp_pcix_dma_req_err_attr; /* 0x000628 */
+ uint64_t cp_pcix_dma_req_err_addr; /* 0x000630 */
+ uint64_t cp_pcix_timeout; /* 0x000638 */
+
+ uint64_t _pad_000640[24]; /* 0x000{640,,,6F8} */
+
+ /* 0x000700-0x000737 -- Debug Registers */
+ uint64_t cp_ct_debug_ctl; /* 0x000700 */
+ uint64_t cp_br_debug_ctl; /* 0x000708 */
+ uint64_t cp_mux3_debug_ctl; /* 0x000710 */
+ uint64_t cp_mux4_debug_ctl; /* 0x000718 */
+ uint64_t cp_mux5_debug_ctl; /* 0x000720 */
+ uint64_t cp_mux6_debug_ctl; /* 0x000728 */
+ uint64_t cp_mux7_debug_ctl; /* 0x000730 */
+
+ uint64_t _pad_000738[89]; /* 0x000{738,,,9F8} */
+
+ /* 0x000A00-0x000BFF -- PCI/X Read&Write Buffer */
+ struct {
+ uint64_t cp_buf_addr; /* 0x000{A00,,,AF0} */
+ uint64_t cp_buf_attr; /* 0X000{A08,,,AF8} */
+ } cp_pcix_read_buf_64[16];
+
+ struct {
+ uint64_t cp_buf_addr; /* 0x000{B00,,,BE0} */
+ uint64_t cp_buf_attr; /* 0x000{B08,,,BE8} */
+ uint64_t cp_buf_valid; /* 0x000{B10,,,BF0} */
+ uint64_t __pad1; /* 0x000{B18,,,BF8} */
+ } cp_pcix_write_buf_64[8];
+
+ /* End of Local Registers -- Start of Address Map space */
+
+ char _pad_000c00[0x010000 - 0x000c00];
+
+ /* 0x010000-0x011FF8 -- Internal ATE RAM (Auto Parity Generation) */
+ uint64_t cp_int_ate_ram[1024]; /* 0x010000-0x011FF8 */
+
+ char _pad_012000[0x14000 - 0x012000];
+
+ /* 0x014000-0x015FF8 -- Internal ATE RAM (Manual Parity Generation) */
+ uint64_t cp_int_ate_ram_mp[1024]; /* 0x014000-0x015FF8 */
+
+ char _pad_016000[0x18000 - 0x016000];
+
+ /* 0x18000-0x197F8 -- TIOCP Write Request Ram */
+ uint64_t cp_wr_req_lower[256]; /* 0x18000 - 0x187F8 */
+ uint64_t cp_wr_req_upper[256]; /* 0x18800 - 0x18FF8 */
+ uint64_t cp_wr_req_parity[256]; /* 0x19000 - 0x197F8 */
+
+ char _pad_019800[0x1C000 - 0x019800];
+
+ /* 0x1C000-0x1EFF8 -- TIOCP Read Response Ram */
+ uint64_t cp_rd_resp_lower[512]; /* 0x1C000 - 0x1CFF8 */
+ uint64_t cp_rd_resp_upper[512]; /* 0x1D000 - 0x1DFF8 */
+ uint64_t cp_rd_resp_parity[512]; /* 0x1E000 - 0x1EFF8 */
+
+ char _pad_01F000[0x20000 - 0x01F000];
+
+ /* 0x020000-0x021FFF -- Host Device (CP) Configuration Space (not used) */
+ char _pad_020000[0x021000 - 0x20000];
+
+ /* 0x021000-0x027FFF -- PCI Device Configuration Spaces */
+ union {
+ uint8_t c[0x1000 / 1]; /* 0x02{0000,,,7FFF} */
+ uint16_t s[0x1000 / 2]; /* 0x02{0000,,,7FFF} */
+ uint32_t l[0x1000 / 4]; /* 0x02{0000,,,7FFF} */
+ uint64_t d[0x1000 / 8]; /* 0x02{0000,,,7FFF} */
+ union {
+ uint8_t c[0x100 / 1];
+ uint16_t s[0x100 / 2];
+ uint32_t l[0x100 / 4];
+ uint64_t d[0x100 / 8];
+ } f[8];
+ } cp_type0_cfg_dev[7]; /* 0x02{1000,,,7FFF} */
+
+ /* 0x028000-0x028FFF -- PCI Type 1 Configuration Space */
+ union {
+ uint8_t c[0x1000 / 1]; /* 0x028000-0x029000 */
+ uint16_t s[0x1000 / 2]; /* 0x028000-0x029000 */
+ uint32_t l[0x1000 / 4]; /* 0x028000-0x029000 */
+ uint64_t d[0x1000 / 8]; /* 0x028000-0x029000 */
+ union {
+ uint8_t c[0x100 / 1];
+ uint16_t s[0x100 / 2];
+ uint32_t l[0x100 / 4];
+ uint64_t d[0x100 / 8];
+ } f[8];
+ } cp_type1_cfg; /* 0x028000-0x029000 */
+
+ char _pad_029000[0x030000-0x029000];
+
+ /* 0x030000-0x030007 -- PCI Interrupt Acknowledge Cycle */
+ union {
+ uint8_t c[8 / 1];
+ uint16_t s[8 / 2];
+ uint32_t l[8 / 4];
+ uint64_t d[8 / 8];
+ } cp_pci_iack; /* 0x030000-0x030007 */
+
+ char _pad_030007[0x040000-0x030008];
+
+ /* 0x040000-0x040007 -- PCIX Special Cycle */
+ union {
+ uint8_t c[8 / 1];
+ uint16_t s[8 / 2];
+ uint32_t l[8 / 4];
+ uint64_t d[8 / 8];
+ } cp_pcix_cycle; /* 0x040000-0x040007 */
+
+ char _pad_040007[0x200000-0x040008];
+
+ /* 0x200000-0x7FFFFF -- PCI/GIO Device Spaces */
+ union {
+ uint8_t c[0x100000 / 1];
+ uint16_t s[0x100000 / 2];
+ uint32_t l[0x100000 / 4];
+ uint64_t d[0x100000 / 8];
+ } cp_devio_raw[6]; /* 0x200000-0x7FFFFF */
+
+ #define cp_devio(n) cp_devio_raw[((n)<2)?(n*2):(n+2)]
+
+ char _pad_800000[0xA00000-0x800000];
+
+ /* 0xA00000-0xBFFFFF -- PCI/GIO Device Spaces w/flush */
+ union {
+ uint8_t c[0x100000 / 1];
+ uint16_t s[0x100000 / 2];
+ uint32_t l[0x100000 / 4];
+ uint64_t d[0x100000 / 8];
+ } cp_devio_raw_flush[6]; /* 0xA00000-0xBFFFFF */
+
+ #define cp_devio_flush(n) cp_devio_raw_flush[((n)<2)?(n*2):(n+2)]
+
+};
+
+#endif /* _ASM_IA64_SN_PCI_TIOCP_H */
diff --git a/arch/ia64/sn/include/tio.h b/arch/ia64/sn/include/tio.h
new file mode 100644
index 00000000000..0139124dd54
--- /dev/null
+++ b/arch/ia64/sn/include/tio.h
@@ -0,0 +1,37 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_TIO_H
+#define _ASM_IA64_SN_TIO_H
+
+#define TIO_MMR_ADDR_MOD
+
+#define TIO_NODE_ID TIO_MMR_ADDR_MOD(0x0000000090060e80)
+
+#define TIO_ITTE_BASE 0xb0008800 /* base of translation table entries */
+#define TIO_ITTE(bigwin) (TIO_ITTE_BASE + 8*(bigwin))
+
+#define TIO_ITTE_OFFSET_BITS 8 /* size of offset field */
+#define TIO_ITTE_OFFSET_MASK ((1<<TIO_ITTE_OFFSET_BITS)-1)
+#define TIO_ITTE_OFFSET_SHIFT 0
+
+#define TIO_ITTE_WIDGET_BITS 2 /* size of widget field */
+#define TIO_ITTE_WIDGET_MASK ((1<<TIO_ITTE_WIDGET_BITS)-1)
+#define TIO_ITTE_WIDGET_SHIFT 12
+#define TIO_ITTE_VALID_MASK 0x1
+#define TIO_ITTE_VALID_SHIFT 16
+
+
+#define TIO_ITTE_PUT(nasid, bigwin, widget, addr, valid) \
+ REMOTE_HUB_S((nasid), TIO_ITTE(bigwin), \
+ (((((addr) >> TIO_BWIN_SIZE_BITS) & \
+ TIO_ITTE_OFFSET_MASK) << TIO_ITTE_OFFSET_SHIFT) | \
+ (((widget) & TIO_ITTE_WIDGET_MASK) << TIO_ITTE_WIDGET_SHIFT)) | \
+ (( (valid) & TIO_ITTE_VALID_MASK) << TIO_ITTE_VALID_SHIFT))
+
+#endif /* _ASM_IA64_SN_TIO_H */
diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h
new file mode 100644
index 00000000000..868e7ecae84
--- /dev/null
+++ b/arch/ia64/sn/include/xtalk/hubdev.h
@@ -0,0 +1,67 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_XTALK_HUBDEV_H
+#define _ASM_IA64_SN_XTALK_HUBDEV_H
+
+#define HUB_WIDGET_ID_MAX 0xf
+#define DEV_PER_WIDGET (2*2*8)
+#define IIO_ITTE_WIDGET_BITS 4 /* size of widget field */
+#define IIO_ITTE_WIDGET_MASK ((1<<IIO_ITTE_WIDGET_BITS)-1)
+#define IIO_ITTE_WIDGET_SHIFT 8
+
+/*
+ * Use the top big window as a surrogate for the first small window
+ */
+#define SWIN0_BIGWIN HUB_NUM_BIG_WINDOW
+#define IIO_NUM_ITTES 7
+#define HUB_NUM_BIG_WINDOW (IIO_NUM_ITTES - 1)
+
+struct sn_flush_device_list {
+ int sfdl_bus;
+ int sfdl_slot;
+ int sfdl_pin;
+ struct bar_list {
+ unsigned long start;
+ unsigned long end;
+ } sfdl_bar_list[6];
+ unsigned long sfdl_force_int_addr;
+ unsigned long sfdl_flush_value;
+ volatile unsigned long *sfdl_flush_addr;
+ uint64_t sfdl_persistent_busnum;
+ struct pcibus_info *sfdl_pcibus_info;
+ spinlock_t sfdl_flush_lock;
+};
+
+/*
+ * **widget_p - Used as an array[wid_num][device] of sn_flush_device_list.
+ */
+struct sn_flush_nasid_entry {
+ struct sn_flush_device_list **widget_p; /* Used as a array of wid_num */
+ uint64_t iio_itte[8];
+};
+
+struct hubdev_info {
+ geoid_t hdi_geoid;
+ short hdi_nasid;
+ short hdi_peer_nasid; /* Dual Porting Peer */
+
+ struct sn_flush_nasid_entry hdi_flush_nasid_list;
+ struct xwidget_info hdi_xwidget_info[HUB_WIDGET_ID_MAX + 1];
+
+
+ void *hdi_nodepda;
+ void *hdi_node_vertex;
+ void *hdi_xtalk_vertex;
+};
+
+extern void hubdev_init_node(nodepda_t *, cnodeid_t);
+extern void hub_error_init(struct hubdev_info *);
+extern void ice_error_init(struct hubdev_info *);
+
+
+#endif /* _ASM_IA64_SN_XTALK_HUBDEV_H */
diff --git a/arch/ia64/sn/include/xtalk/xbow.h b/arch/ia64/sn/include/xtalk/xbow.h
new file mode 100644
index 00000000000..ec56b3432f1
--- /dev/null
+++ b/arch/ia64/sn/include/xtalk/xbow.h
@@ -0,0 +1,291 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ */
+#ifndef _ASM_IA64_SN_XTALK_XBOW_H
+#define _ASM_IA64_SN_XTALK_XBOW_H
+
+#define XBOW_PORT_8 0x8
+#define XBOW_PORT_C 0xc
+#define XBOW_PORT_F 0xf
+
+#define MAX_XBOW_PORTS 8 /* number of ports on xbow chip */
+#define BASE_XBOW_PORT XBOW_PORT_8 /* Lowest external port */
+
+#define XBOW_CREDIT 4
+
+#define MAX_XBOW_NAME 16
+
+/* Register set for each xbow link */
+typedef volatile struct xb_linkregs_s {
+/*
+ * we access these through synergy unswizzled space, so the address
+ * gets twiddled (i.e. references to 0x4 actually go to 0x0 and vv.)
+ * That's why we put the register first and filler second.
+ */
+ uint32_t link_ibf;
+ uint32_t filler0; /* filler for proper alignment */
+ uint32_t link_control;
+ uint32_t filler1;
+ uint32_t link_status;
+ uint32_t filler2;
+ uint32_t link_arb_upper;
+ uint32_t filler3;
+ uint32_t link_arb_lower;
+ uint32_t filler4;
+ uint32_t link_status_clr;
+ uint32_t filler5;
+ uint32_t link_reset;
+ uint32_t filler6;
+ uint32_t link_aux_status;
+ uint32_t filler7;
+} xb_linkregs_t;
+
+typedef volatile struct xbow_s {
+ /* standard widget configuration 0x000000-0x000057 */
+ struct widget_cfg xb_widget; /* 0x000000 */
+
+ /* helper fieldnames for accessing bridge widget */
+
+#define xb_wid_id xb_widget.w_id
+#define xb_wid_stat xb_widget.w_status
+#define xb_wid_err_upper xb_widget.w_err_upper_addr
+#define xb_wid_err_lower xb_widget.w_err_lower_addr
+#define xb_wid_control xb_widget.w_control
+#define xb_wid_req_timeout xb_widget.w_req_timeout
+#define xb_wid_int_upper xb_widget.w_intdest_upper_addr
+#define xb_wid_int_lower xb_widget.w_intdest_lower_addr
+#define xb_wid_err_cmdword xb_widget.w_err_cmd_word
+#define xb_wid_llp xb_widget.w_llp_cfg
+#define xb_wid_stat_clr xb_widget.w_tflush
+
+/*
+ * we access these through synergy unswizzled space, so the address
+ * gets twiddled (i.e. references to 0x4 actually go to 0x0 and vv.)
+ * That's why we put the register first and filler second.
+ */
+ /* xbow-specific widget configuration 0x000058-0x0000FF */
+ uint32_t xb_wid_arb_reload; /* 0x00005C */
+ uint32_t _pad_000058;
+ uint32_t xb_perf_ctr_a; /* 0x000064 */
+ uint32_t _pad_000060;
+ uint32_t xb_perf_ctr_b; /* 0x00006c */
+ uint32_t _pad_000068;
+ uint32_t xb_nic; /* 0x000074 */
+ uint32_t _pad_000070;
+
+ /* Xbridge only */
+ uint32_t xb_w0_rst_fnc; /* 0x00007C */
+ uint32_t _pad_000078;
+ uint32_t xb_l8_rst_fnc; /* 0x000084 */
+ uint32_t _pad_000080;
+ uint32_t xb_l9_rst_fnc; /* 0x00008c */
+ uint32_t _pad_000088;
+ uint32_t xb_la_rst_fnc; /* 0x000094 */
+ uint32_t _pad_000090;
+ uint32_t xb_lb_rst_fnc; /* 0x00009c */
+ uint32_t _pad_000098;
+ uint32_t xb_lc_rst_fnc; /* 0x0000a4 */
+ uint32_t _pad_0000a0;
+ uint32_t xb_ld_rst_fnc; /* 0x0000ac */
+ uint32_t _pad_0000a8;
+ uint32_t xb_le_rst_fnc; /* 0x0000b4 */
+ uint32_t _pad_0000b0;
+ uint32_t xb_lf_rst_fnc; /* 0x0000bc */
+ uint32_t _pad_0000b8;
+ uint32_t xb_lock; /* 0x0000c4 */
+ uint32_t _pad_0000c0;
+ uint32_t xb_lock_clr; /* 0x0000cc */
+ uint32_t _pad_0000c8;
+ /* end of Xbridge only */
+ uint32_t _pad_0000d0[12];
+
+ /* Link Specific Registers, port 8..15 0x000100-0x000300 */
+ xb_linkregs_t xb_link_raw[MAX_XBOW_PORTS];
+#define xb_link(p) xb_link_raw[(p) & (MAX_XBOW_PORTS - 1)]
+
+} xbow_t;
+
+#define XB_FLAGS_EXISTS 0x1 /* device exists */
+#define XB_FLAGS_MASTER 0x2
+#define XB_FLAGS_SLAVE 0x0
+#define XB_FLAGS_GBR 0x4
+#define XB_FLAGS_16BIT 0x8
+#define XB_FLAGS_8BIT 0x0
+
+/* is widget port number valid? (based on version 7.0 of xbow spec) */
+#define XBOW_WIDGET_IS_VALID(wid) ((wid) >= XBOW_PORT_8 && (wid) <= XBOW_PORT_F)
+
+/* whether to use upper or lower arbitration register, given source widget id */
+#define XBOW_ARB_IS_UPPER(wid) ((wid) >= XBOW_PORT_8 && (wid) <= XBOW_PORT_B)
+#define XBOW_ARB_IS_LOWER(wid) ((wid) >= XBOW_PORT_C && (wid) <= XBOW_PORT_F)
+
+/* offset of arbitration register, given source widget id */
+#define XBOW_ARB_OFF(wid) (XBOW_ARB_IS_UPPER(wid) ? 0x1c : 0x24)
+
+#define XBOW_WID_ID WIDGET_ID
+#define XBOW_WID_STAT WIDGET_STATUS
+#define XBOW_WID_ERR_UPPER WIDGET_ERR_UPPER_ADDR
+#define XBOW_WID_ERR_LOWER WIDGET_ERR_LOWER_ADDR
+#define XBOW_WID_CONTROL WIDGET_CONTROL
+#define XBOW_WID_REQ_TO WIDGET_REQ_TIMEOUT
+#define XBOW_WID_INT_UPPER WIDGET_INTDEST_UPPER_ADDR
+#define XBOW_WID_INT_LOWER WIDGET_INTDEST_LOWER_ADDR
+#define XBOW_WID_ERR_CMDWORD WIDGET_ERR_CMD_WORD
+#define XBOW_WID_LLP WIDGET_LLP_CFG
+#define XBOW_WID_STAT_CLR WIDGET_TFLUSH
+#define XBOW_WID_ARB_RELOAD 0x5c
+#define XBOW_WID_PERF_CTR_A 0x64
+#define XBOW_WID_PERF_CTR_B 0x6c
+#define XBOW_WID_NIC 0x74
+
+/* Xbridge only */
+#define XBOW_W0_RST_FNC 0x00007C
+#define XBOW_L8_RST_FNC 0x000084
+#define XBOW_L9_RST_FNC 0x00008c
+#define XBOW_LA_RST_FNC 0x000094
+#define XBOW_LB_RST_FNC 0x00009c
+#define XBOW_LC_RST_FNC 0x0000a4
+#define XBOW_LD_RST_FNC 0x0000ac
+#define XBOW_LE_RST_FNC 0x0000b4
+#define XBOW_LF_RST_FNC 0x0000bc
+#define XBOW_RESET_FENCE(x) ((x) > 7 && (x) < 16) ? \
+ (XBOW_W0_RST_FNC + ((x) - 7) * 8) : \
+ ((x) == 0) ? XBOW_W0_RST_FNC : 0
+#define XBOW_LOCK 0x0000c4
+#define XBOW_LOCK_CLR 0x0000cc
+/* End of Xbridge only */
+
+/* used only in ide, but defined here within the reserved portion */
+/* of the widget0 address space (before 0xf4) */
+#define XBOW_WID_UNDEF 0xe4
+
+/* xbow link register set base, legal value for x is 0x8..0xf */
+#define XB_LINK_BASE 0x100
+#define XB_LINK_OFFSET 0x40
+#define XB_LINK_REG_BASE(x) (XB_LINK_BASE + ((x) & (MAX_XBOW_PORTS - 1)) * XB_LINK_OFFSET)
+
+#define XB_LINK_IBUF_FLUSH(x) (XB_LINK_REG_BASE(x) + 0x4)
+#define XB_LINK_CTRL(x) (XB_LINK_REG_BASE(x) + 0xc)
+#define XB_LINK_STATUS(x) (XB_LINK_REG_BASE(x) + 0x14)
+#define XB_LINK_ARB_UPPER(x) (XB_LINK_REG_BASE(x) + 0x1c)
+#define XB_LINK_ARB_LOWER(x) (XB_LINK_REG_BASE(x) + 0x24)
+#define XB_LINK_STATUS_CLR(x) (XB_LINK_REG_BASE(x) + 0x2c)
+#define XB_LINK_RESET(x) (XB_LINK_REG_BASE(x) + 0x34)
+#define XB_LINK_AUX_STATUS(x) (XB_LINK_REG_BASE(x) + 0x3c)
+
+/* link_control(x) */
+#define XB_CTRL_LINKALIVE_IE 0x80000000 /* link comes alive */
+ /* reserved: 0x40000000 */
+#define XB_CTRL_PERF_CTR_MODE_MSK 0x30000000 /* perf counter mode */
+#define XB_CTRL_IBUF_LEVEL_MSK 0x0e000000 /* input packet buffer level */
+#define XB_CTRL_8BIT_MODE 0x01000000 /* force link into 8 bit mode */
+#define XB_CTRL_BAD_LLP_PKT 0x00800000 /* force bad LLP packet */
+#define XB_CTRL_WIDGET_CR_MSK 0x007c0000 /* LLP widget credit mask */
+#define XB_CTRL_WIDGET_CR_SHFT 18 /* LLP widget credit shift */
+#define XB_CTRL_ILLEGAL_DST_IE 0x00020000 /* illegal destination */
+#define XB_CTRL_OALLOC_IBUF_IE 0x00010000 /* overallocated input buffer */
+ /* reserved: 0x0000fe00 */
+#define XB_CTRL_BNDWDTH_ALLOC_IE 0x00000100 /* bandwidth alloc */
+#define XB_CTRL_RCV_CNT_OFLOW_IE 0x00000080 /* rcv retry overflow */
+#define XB_CTRL_XMT_CNT_OFLOW_IE 0x00000040 /* xmt retry overflow */
+#define XB_CTRL_XMT_MAX_RTRY_IE 0x00000020 /* max transmit retry */
+#define XB_CTRL_RCV_IE 0x00000010 /* receive */
+#define XB_CTRL_XMT_RTRY_IE 0x00000008 /* transmit retry */
+ /* reserved: 0x00000004 */
+#define XB_CTRL_MAXREQ_TOUT_IE 0x00000002 /* maximum request timeout */
+#define XB_CTRL_SRC_TOUT_IE 0x00000001 /* source timeout */
+
+/* link_status(x) */
+#define XB_STAT_LINKALIVE XB_CTRL_LINKALIVE_IE
+ /* reserved: 0x7ff80000 */
+#define XB_STAT_MULTI_ERR 0x00040000 /* multi error */
+#define XB_STAT_ILLEGAL_DST_ERR XB_CTRL_ILLEGAL_DST_IE
+#define XB_STAT_OALLOC_IBUF_ERR XB_CTRL_OALLOC_IBUF_IE
+#define XB_STAT_BNDWDTH_ALLOC_ID_MSK 0x0000ff00 /* port bitmask */
+#define XB_STAT_RCV_CNT_OFLOW_ERR XB_CTRL_RCV_CNT_OFLOW_IE
+#define XB_STAT_XMT_CNT_OFLOW_ERR XB_CTRL_XMT_CNT_OFLOW_IE
+#define XB_STAT_XMT_MAX_RTRY_ERR XB_CTRL_XMT_MAX_RTRY_IE
+#define XB_STAT_RCV_ERR XB_CTRL_RCV_IE
+#define XB_STAT_XMT_RTRY_ERR XB_CTRL_XMT_RTRY_IE
+ /* reserved: 0x00000004 */
+#define XB_STAT_MAXREQ_TOUT_ERR XB_CTRL_MAXREQ_TOUT_IE
+#define XB_STAT_SRC_TOUT_ERR XB_CTRL_SRC_TOUT_IE
+
+/* link_aux_status(x) */
+#define XB_AUX_STAT_RCV_CNT 0xff000000
+#define XB_AUX_STAT_XMT_CNT 0x00ff0000
+#define XB_AUX_STAT_TOUT_DST 0x0000ff00
+#define XB_AUX_LINKFAIL_RST_BAD 0x00000040
+#define XB_AUX_STAT_PRESENT 0x00000020
+#define XB_AUX_STAT_PORT_WIDTH 0x00000010
+ /* reserved: 0x0000000f */
+
+/*
+ * link_arb_upper/link_arb_lower(x), (reg) should be the link_arb_upper
+ * register if (x) is 0x8..0xb, link_arb_lower if (x) is 0xc..0xf
+ */
+#define XB_ARB_GBR_MSK 0x1f
+#define XB_ARB_RR_MSK 0x7
+#define XB_ARB_GBR_SHFT(x) (((x) & 0x3) * 8)
+#define XB_ARB_RR_SHFT(x) (((x) & 0x3) * 8 + 5)
+#define XB_ARB_GBR_CNT(reg,x) ((reg) >> XB_ARB_GBR_SHFT(x) & XB_ARB_GBR_MSK)
+#define XB_ARB_RR_CNT(reg,x) ((reg) >> XB_ARB_RR_SHFT(x) & XB_ARB_RR_MSK)
+
+/* XBOW_WID_STAT */
+#define XB_WID_STAT_LINK_INTR_SHFT (24)
+#define XB_WID_STAT_LINK_INTR_MASK (0xFF << XB_WID_STAT_LINK_INTR_SHFT)
+#define XB_WID_STAT_LINK_INTR(x) (0x1 << (((x)&7) + XB_WID_STAT_LINK_INTR_SHFT))
+#define XB_WID_STAT_WIDGET0_INTR 0x00800000
+#define XB_WID_STAT_SRCID_MASK 0x000003c0 /* Xbridge only */
+#define XB_WID_STAT_REG_ACC_ERR 0x00000020
+#define XB_WID_STAT_RECV_TOUT 0x00000010 /* Xbridge only */
+#define XB_WID_STAT_ARB_TOUT 0x00000008 /* Xbridge only */
+#define XB_WID_STAT_XTALK_ERR 0x00000004
+#define XB_WID_STAT_DST_TOUT 0x00000002 /* Xbridge only */
+#define XB_WID_STAT_MULTI_ERR 0x00000001
+
+#define XB_WID_STAT_SRCID_SHFT 6
+
+/* XBOW_WID_CONTROL */
+#define XB_WID_CTRL_REG_ACC_IE XB_WID_STAT_REG_ACC_ERR
+#define XB_WID_CTRL_RECV_TOUT XB_WID_STAT_RECV_TOUT
+#define XB_WID_CTRL_ARB_TOUT XB_WID_STAT_ARB_TOUT
+#define XB_WID_CTRL_XTALK_IE XB_WID_STAT_XTALK_ERR
+
+/* XBOW_WID_INT_UPPER */
+/* defined in xwidget.h for WIDGET_INTDEST_UPPER_ADDR */
+
+/* XBOW WIDGET part number, in the ID register */
+#define XBOW_WIDGET_PART_NUM 0x0 /* crossbow */
+#define XXBOW_WIDGET_PART_NUM 0xd000 /* Xbridge */
+#define XBOW_WIDGET_MFGR_NUM 0x0
+#define XXBOW_WIDGET_MFGR_NUM 0x0
+#define PXBOW_WIDGET_PART_NUM 0xd100 /* PIC */
+
+#define XBOW_REV_1_0 0x1 /* xbow rev 1.0 is "1" */
+#define XBOW_REV_1_1 0x2 /* xbow rev 1.1 is "2" */
+#define XBOW_REV_1_2 0x3 /* xbow rev 1.2 is "3" */
+#define XBOW_REV_1_3 0x4 /* xbow rev 1.3 is "4" */
+#define XBOW_REV_2_0 0x5 /* xbow rev 2.0 is "5" */
+
+#define XXBOW_PART_REV_1_0 (XXBOW_WIDGET_PART_NUM << 4 | 0x1 )
+#define XXBOW_PART_REV_2_0 (XXBOW_WIDGET_PART_NUM << 4 | 0x2 )
+
+/* XBOW_WID_ARB_RELOAD */
+#define XBOW_WID_ARB_RELOAD_INT 0x3f /* GBR reload interval */
+
+#define IS_XBRIDGE_XBOW(wid) \
+ (XWIDGET_PART_NUM(wid) == XXBOW_WIDGET_PART_NUM && \
+ XWIDGET_MFG_NUM(wid) == XXBOW_WIDGET_MFGR_NUM)
+
+#define IS_PIC_XBOW(wid) \
+ (XWIDGET_PART_NUM(wid) == PXBOW_WIDGET_PART_NUM && \
+ XWIDGET_MFG_NUM(wid) == XXBOW_WIDGET_MFGR_NUM)
+
+#define XBOW_WAR_ENABLED(pv, widid) ((1 << XWIDGET_REV_NUM(widid)) & pv)
+
+#endif /* _ASM_IA64_SN_XTALK_XBOW_H */
diff --git a/arch/ia64/sn/include/xtalk/xwidgetdev.h b/arch/ia64/sn/include/xtalk/xwidgetdev.h
new file mode 100644
index 00000000000..c5f4bc5cc03
--- /dev/null
+++ b/arch/ia64/sn/include/xtalk/xwidgetdev.h
@@ -0,0 +1,70 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992-1997,2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ */
+#ifndef _ASM_IA64_SN_XTALK_XWIDGET_H
+#define _ASM_IA64_SN_XTALK_XWIDGET_H
+
+/* WIDGET_ID */
+#define WIDGET_REV_NUM 0xf0000000
+#define WIDGET_PART_NUM 0x0ffff000
+#define WIDGET_MFG_NUM 0x00000ffe
+#define WIDGET_REV_NUM_SHFT 28
+#define WIDGET_PART_NUM_SHFT 12
+#define WIDGET_MFG_NUM_SHFT 1
+
+#define XWIDGET_PART_NUM(widgetid) (((widgetid) & WIDGET_PART_NUM) >> WIDGET_PART_NUM_SHFT)
+#define XWIDGET_REV_NUM(widgetid) (((widgetid) & WIDGET_REV_NUM) >> WIDGET_REV_NUM_SHFT)
+#define XWIDGET_MFG_NUM(widgetid) (((widgetid) & WIDGET_MFG_NUM) >> WIDGET_MFG_NUM_SHFT)
+#define XWIDGET_PART_REV_NUM(widgetid) ((XWIDGET_PART_NUM(widgetid) << 4) | \
+ XWIDGET_REV_NUM(widgetid))
+#define XWIDGET_PART_REV_NUM_REV(partrev) (partrev & 0xf)
+
+/* widget configuration registers */
+struct widget_cfg{
+ uint32_t w_id; /* 0x04 */
+ uint32_t w_pad_0; /* 0x00 */
+ uint32_t w_status; /* 0x0c */
+ uint32_t w_pad_1; /* 0x08 */
+ uint32_t w_err_upper_addr; /* 0x14 */
+ uint32_t w_pad_2; /* 0x10 */
+ uint32_t w_err_lower_addr; /* 0x1c */
+ uint32_t w_pad_3; /* 0x18 */
+ uint32_t w_control; /* 0x24 */
+ uint32_t w_pad_4; /* 0x20 */
+ uint32_t w_req_timeout; /* 0x2c */
+ uint32_t w_pad_5; /* 0x28 */
+ uint32_t w_intdest_upper_addr; /* 0x34 */
+ uint32_t w_pad_6; /* 0x30 */
+ uint32_t w_intdest_lower_addr; /* 0x3c */
+ uint32_t w_pad_7; /* 0x38 */
+ uint32_t w_err_cmd_word; /* 0x44 */
+ uint32_t w_pad_8; /* 0x40 */
+ uint32_t w_llp_cfg; /* 0x4c */
+ uint32_t w_pad_9; /* 0x48 */
+ uint32_t w_tflush; /* 0x54 */
+ uint32_t w_pad_10; /* 0x50 */
+};
+
+/*
+ * Crosstalk Widget Hardware Identification, as defined in the Crosstalk spec.
+ */
+struct xwidget_hwid{
+ int mfg_num;
+ int rev_num;
+ int part_num;
+};
+
+struct xwidget_info{
+
+ struct xwidget_hwid xwi_hwid; /* Widget Identification */
+ char xwi_masterxid; /* Hub's Widget Port Number */
+ void *xwi_hubinfo; /* Hub's provider private info */
+ uint64_t *xwi_hub_provider; /* prom provider functions */
+ void *xwi_vertex;
+};
+
+#endif /* _ASM_IA64_SN_XTALK_XWIDGET_H */
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
new file mode 100644
index 00000000000..6c7f4d9e8ea
--- /dev/null
+++ b/arch/ia64/sn/kernel/Makefile
@@ -0,0 +1,12 @@
+# arch/ia64/sn/kernel/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1999,2001-2003 Silicon Graphics, Inc. All Rights Reserved.
+#
+
+obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \
+ huberror.o io_init.o iomv.o klconflib.o sn2/
+obj-$(CONFIG_IA64_GENERIC) += machvec.o
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
new file mode 100644
index 00000000000..ce0bc4085ea
--- /dev/null
+++ b/arch/ia64/sn/kernel/bte.c
@@ -0,0 +1,453 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/arch.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/shubio.h>
+#include <asm/nodedata.h>
+#include <asm/delay.h>
+
+#include <linux/bootmem.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+
+#include <asm/sn/bte.h>
+
+#ifndef L1_CACHE_MASK
+#define L1_CACHE_MASK (L1_CACHE_BYTES - 1)
+#endif
+
+/* two interfaces on two btes */
+#define MAX_INTERFACES_TO_TRY 4
+
+static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface)
+{
+ nodepda_t *tmp_nodepda;
+
+ tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid));
+ return &tmp_nodepda->bte_if[interface];
+
+}
+
+/************************************************************************
+ * Block Transfer Engine copy related functions.
+ *
+ ***********************************************************************/
+
+/*
+ * bte_copy(src, dest, len, mode, notification)
+ *
+ * Use the block transfer engine to move kernel memory from src to dest
+ * using the assigned mode.
+ *
+ * Paramaters:
+ * src - physical address of the transfer source.
+ * dest - physical address of the transfer destination.
+ * len - number of bytes to transfer from source to dest.
+ * mode - hardware defined. See reference information
+ * for IBCT0/1 in the SHUB Programmers Reference
+ * notification - kernel virtual address of the notification cache
+ * line. If NULL, the default is used and
+ * the bte_copy is synchronous.
+ *
+ * NOTE: This function requires src, dest, and len to
+ * be cacheline aligned.
+ */
+bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
+{
+ u64 transfer_size;
+ u64 transfer_stat;
+ struct bteinfo_s *bte;
+ bte_result_t bte_status;
+ unsigned long irq_flags;
+ unsigned long itc_end = 0;
+ struct bteinfo_s *btes_to_try[MAX_INTERFACES_TO_TRY];
+ int bte_if_index;
+ int bte_pri, bte_sec;
+
+ BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n",
+ src, dest, len, mode, notification));
+
+ if (len == 0) {
+ return BTE_SUCCESS;
+ }
+
+ BUG_ON((len & L1_CACHE_MASK) ||
+ (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK));
+ BUG_ON(!(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT)));
+
+ /* CPU 0 (per node) tries bte0 first, CPU 1 try bte1 first */
+ if (cpuid_to_subnode(smp_processor_id()) == 0) {
+ bte_pri = 0;
+ bte_sec = 1;
+ } else {
+ bte_pri = 1;
+ bte_sec = 0;
+ }
+
+ if (mode & BTE_USE_DEST) {
+ /* try remote then local */
+ btes_to_try[0] = bte_if_on_node(NASID_GET(dest), bte_pri);
+ btes_to_try[1] = bte_if_on_node(NASID_GET(dest), bte_sec);
+ if (mode & BTE_USE_ANY) {
+ btes_to_try[2] = bte_if_on_node(get_nasid(), bte_pri);
+ btes_to_try[3] = bte_if_on_node(get_nasid(), bte_sec);
+ } else {
+ btes_to_try[2] = NULL;
+ btes_to_try[3] = NULL;
+ }
+ } else {
+ /* try local then remote */
+ btes_to_try[0] = bte_if_on_node(get_nasid(), bte_pri);
+ btes_to_try[1] = bte_if_on_node(get_nasid(), bte_sec);
+ if (mode & BTE_USE_ANY) {
+ btes_to_try[2] = bte_if_on_node(NASID_GET(dest), bte_pri);
+ btes_to_try[3] = bte_if_on_node(NASID_GET(dest), bte_sec);
+ } else {
+ btes_to_try[2] = NULL;
+ btes_to_try[3] = NULL;
+ }
+ }
+
+retry_bteop:
+ do {
+ local_irq_save(irq_flags);
+
+ bte_if_index = 0;
+
+ /* Attempt to lock one of the BTE interfaces. */
+ while (bte_if_index < MAX_INTERFACES_TO_TRY) {
+ bte = btes_to_try[bte_if_index++];
+
+ if (bte == NULL) {
+ continue;
+ }
+
+ if (spin_trylock(&bte->spinlock)) {
+ if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) ||
+ (BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) {
+ /* Got the lock but BTE still busy */
+ spin_unlock(&bte->spinlock);
+ } else {
+ /* we got the lock and it's not busy */
+ break;
+ }
+ }
+ bte = NULL;
+ }
+
+ if (bte != NULL) {
+ break;
+ }
+
+ local_irq_restore(irq_flags);
+
+ if (!(mode & BTE_WACQUIRE)) {
+ return BTEFAIL_NOTAVAIL;
+ }
+ } while (1);
+
+ if (notification == NULL) {
+ /* User does not want to be notified. */
+ bte->most_rcnt_na = &bte->notify;
+ } else {
+ bte->most_rcnt_na = notification;
+ }
+
+ /* Calculate the number of cache lines to transfer. */
+ transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK);
+
+ /* Initialize the notification to a known value. */
+ *bte->most_rcnt_na = BTE_WORD_BUSY;
+
+ /* Set the status reg busy bit and transfer length */
+ BTE_PRINTKV(("IBLS = 0x%lx\n", IBLS_BUSY | transfer_size));
+ BTE_LNSTAT_STORE(bte, IBLS_BUSY | transfer_size);
+
+ /* Set the source and destination registers */
+ BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src))));
+ BTE_SRC_STORE(bte, TO_PHYS(src));
+ BTE_PRINTKV(("IBDA = 0x%lx)\n", (TO_PHYS(dest))));
+ BTE_DEST_STORE(bte, TO_PHYS(dest));
+
+ /* Set the notification register */
+ BTE_PRINTKV(("IBNA = 0x%lx)\n",
+ TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na))));
+ BTE_NOTIF_STORE(bte,
+ TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na)));
+
+ /* Initiate the transfer */
+ BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode)));
+ BTE_CTRL_STORE(bte, BTE_VALID_MODE(mode));
+
+ itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec);
+
+ spin_unlock_irqrestore(&bte->spinlock, irq_flags);
+
+ if (notification != NULL) {
+ return BTE_SUCCESS;
+ }
+
+ while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) {
+ if (ia64_get_itc() > itc_end) {
+ BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n",
+ NASID_GET(bte->bte_base_addr), bte->bte_num,
+ BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) );
+ bte->bte_error_count++;
+ bte->bh_error = IBLS_ERROR;
+ bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode));
+ *bte->most_rcnt_na = BTE_WORD_AVAILABLE;
+ goto retry_bteop;
+ }
+ }
+
+ BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, most_rcnt_na = 0x%lx\n",
+ BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));
+
+ if (transfer_stat & IBLS_ERROR) {
+ bte_status = transfer_stat & ~IBLS_ERROR;
+ } else {
+ bte_status = BTE_SUCCESS;
+ }
+ *bte->most_rcnt_na = BTE_WORD_AVAILABLE;
+
+ BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n",
+ BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));
+
+ return bte_status;
+}
+
+EXPORT_SYMBOL(bte_copy);
+
+/*
+ * bte_unaligned_copy(src, dest, len, mode)
+ *
+ * use the block transfer engine to move kernel
+ * memory from src to dest using the assigned mode.
+ *
+ * Paramaters:
+ * src - physical address of the transfer source.
+ * dest - physical address of the transfer destination.
+ * len - number of bytes to transfer from source to dest.
+ * mode - hardware defined. See reference information
+ * for IBCT0/1 in the SGI documentation.
+ *
+ * NOTE: If the source, dest, and len are all cache line aligned,
+ * then it would be _FAR_ preferrable to use bte_copy instead.
+ */
+bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
+{
+ int destFirstCacheOffset;
+ u64 headBteSource;
+ u64 headBteLen;
+ u64 headBcopySrcOffset;
+ u64 headBcopyDest;
+ u64 headBcopyLen;
+ u64 footBteSource;
+ u64 footBteLen;
+ u64 footBcopyDest;
+ u64 footBcopyLen;
+ bte_result_t rv;
+ char *bteBlock, *bteBlock_unaligned;
+
+ if (len == 0) {
+ return BTE_SUCCESS;
+ }
+
+ /* temporary buffer used during unaligned transfers */
+ bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES,
+ GFP_KERNEL | GFP_DMA);
+ if (bteBlock_unaligned == NULL) {
+ return BTEFAIL_NOTAVAIL;
+ }
+ bteBlock = (char *)L1_CACHE_ALIGN((u64) bteBlock_unaligned);
+
+ headBcopySrcOffset = src & L1_CACHE_MASK;
+ destFirstCacheOffset = dest & L1_CACHE_MASK;
+
+ /*
+ * At this point, the transfer is broken into
+ * (up to) three sections. The first section is
+ * from the start address to the first physical
+ * cache line, the second is from the first physical
+ * cache line to the last complete cache line,
+ * and the third is from the last cache line to the
+ * end of the buffer. The first and third sections
+ * are handled by bte copying into a temporary buffer
+ * and then bcopy'ing the necessary section into the
+ * final location. The middle section is handled with
+ * a standard bte copy.
+ *
+ * One nasty exception to the above rule is when the
+ * source and destination are not symetrically
+ * mis-aligned. If the source offset from the first
+ * cache line is different from the destination offset,
+ * we make the first section be the entire transfer
+ * and the bcopy the entire block into place.
+ */
+ if (headBcopySrcOffset == destFirstCacheOffset) {
+
+ /*
+ * Both the source and destination are the same
+ * distance from a cache line boundary so we can
+ * use the bte to transfer the bulk of the
+ * data.
+ */
+ headBteSource = src & ~L1_CACHE_MASK;
+ headBcopyDest = dest;
+ if (headBcopySrcOffset) {
+ headBcopyLen =
+ (len >
+ (L1_CACHE_BYTES -
+ headBcopySrcOffset) ? L1_CACHE_BYTES
+ - headBcopySrcOffset : len);
+ headBteLen = L1_CACHE_BYTES;
+ } else {
+ headBcopyLen = 0;
+ headBteLen = 0;
+ }
+
+ if (len > headBcopyLen) {
+ footBcopyLen = (len - headBcopyLen) & L1_CACHE_MASK;
+ footBteLen = L1_CACHE_BYTES;
+
+ footBteSource = src + len - footBcopyLen;
+ footBcopyDest = dest + len - footBcopyLen;
+
+ if (footBcopyDest == (headBcopyDest + headBcopyLen)) {
+ /*
+ * We have two contigous bcopy
+ * blocks. Merge them.
+ */
+ headBcopyLen += footBcopyLen;
+ headBteLen += footBteLen;
+ } else if (footBcopyLen > 0) {
+ rv = bte_copy(footBteSource,
+ ia64_tpa((unsigned long)bteBlock),
+ footBteLen, mode, NULL);
+ if (rv != BTE_SUCCESS) {
+ kfree(bteBlock_unaligned);
+ return rv;
+ }
+
+ memcpy(__va(footBcopyDest),
+ (char *)bteBlock, footBcopyLen);
+ }
+ } else {
+ footBcopyLen = 0;
+ footBteLen = 0;
+ }
+
+ if (len > (headBcopyLen + footBcopyLen)) {
+ /* now transfer the middle. */
+ rv = bte_copy((src + headBcopyLen),
+ (dest +
+ headBcopyLen),
+ (len - headBcopyLen -
+ footBcopyLen), mode, NULL);
+ if (rv != BTE_SUCCESS) {
+ kfree(bteBlock_unaligned);
+ return rv;
+ }
+
+ }
+ } else {
+
+ /*
+ * The transfer is not symetric, we will
+ * allocate a buffer large enough for all the
+ * data, bte_copy into that buffer and then
+ * bcopy to the destination.
+ */
+
+ /* Add the leader from source */
+ headBteLen = len + (src & L1_CACHE_MASK);
+ /* Add the trailing bytes from footer. */
+ headBteLen += L1_CACHE_BYTES - (headBteLen & L1_CACHE_MASK);
+ headBteSource = src & ~L1_CACHE_MASK;
+ headBcopySrcOffset = src & L1_CACHE_MASK;
+ headBcopyDest = dest;
+ headBcopyLen = len;
+ }
+
+ if (headBcopyLen > 0) {
+ rv = bte_copy(headBteSource,
+ ia64_tpa((unsigned long)bteBlock), headBteLen,
+ mode, NULL);
+ if (rv != BTE_SUCCESS) {
+ kfree(bteBlock_unaligned);
+ return rv;
+ }
+
+ memcpy(__va(headBcopyDest), ((char *)bteBlock +
+ headBcopySrcOffset), headBcopyLen);
+ }
+ kfree(bteBlock_unaligned);
+ return BTE_SUCCESS;
+}
+
+EXPORT_SYMBOL(bte_unaligned_copy);
+
+/************************************************************************
+ * Block Transfer Engine initialization functions.
+ *
+ ***********************************************************************/
+
+/*
+ * bte_init_node(nodepda, cnode)
+ *
+ * Initialize the nodepda structure with BTE base addresses and
+ * spinlocks.
+ */
+void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode)
+{
+ int i;
+
+ /*
+ * Indicate that all the block transfer engines on this node
+ * are available.
+ */
+
+ /*
+ * Allocate one bte_recover_t structure per node. It holds
+ * the recovery lock for node. All the bte interface structures
+ * will point at this one bte_recover structure to get the lock.
+ */
+ spin_lock_init(&mynodepda->bte_recovery_lock);
+ init_timer(&mynodepda->bte_recovery_timer);
+ mynodepda->bte_recovery_timer.function = bte_error_handler;
+ mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda;
+
+ for (i = 0; i < BTES_PER_NODE; i++) {
+ /* Which link status register should we use? */
+ unsigned long link_status = (i == 0 ? IIO_IBLS0 : IIO_IBLS1);
+ mynodepda->bte_if[i].bte_base_addr = (u64 *)
+ REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), link_status);
+
+ /*
+ * Initialize the notification and spinlock
+ * so the first transfer can occur.
+ */
+ mynodepda->bte_if[i].most_rcnt_na =
+ &(mynodepda->bte_if[i].notify);
+ mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE;
+ spin_lock_init(&mynodepda->bte_if[i].spinlock);
+
+ mynodepda->bte_if[i].bte_cnode = cnode;
+ mynodepda->bte_if[i].bte_error_count = 0;
+ mynodepda->bte_if[i].bte_num = i;
+ mynodepda->bte_if[i].cleanup_active = 0;
+ mynodepda->bte_if[i].bh_error = 0;
+ }
+
+}
diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c
new file mode 100644
index 00000000000..fd104312c6b
--- /dev/null
+++ b/arch/ia64/sn/kernel/bte_error.c
@@ -0,0 +1,198 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <linux/types.h>
+#include <asm/sn/sn_sal.h>
+#include "ioerror.h"
+#include <asm/sn/addrs.h>
+#include <asm/sn/shubio.h>
+#include <asm/sn/geo.h>
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+#include <asm/sn/bte.h>
+#include <asm/param.h>
+
+/*
+ * Bte error handling is done in two parts. The first captures
+ * any crb related errors. Since there can be multiple crbs per
+ * interface and multiple interfaces active, we need to wait until
+ * all active crbs are completed. This is the first job of the
+ * second part error handler. When all bte related CRBs are cleanly
+ * completed, it resets the interfaces and gets them ready for new
+ * transfers to be queued.
+ */
+
+void bte_error_handler(unsigned long);
+
+/*
+ * Wait until all BTE related CRBs are completed
+ * and then reset the interfaces.
+ */
+void bte_error_handler(unsigned long _nodepda)
+{
+ struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
+ spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
+ struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
+ nasid_t nasid;
+ int i;
+ int valid_crbs;
+ unsigned long irq_flags;
+ volatile u64 *notify;
+ bte_result_t bh_error;
+ ii_imem_u_t imem; /* II IMEM Register */
+ ii_icrb0_d_u_t icrbd; /* II CRB Register D */
+ ii_ibcr_u_t ibcr;
+ ii_icmr_u_t icmr;
+ ii_ieclr_u_t ieclr;
+
+ BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda,
+ smp_processor_id()));
+
+ spin_lock_irqsave(recovery_lock, irq_flags);
+
+ if ((err_nodepda->bte_if[0].bh_error == BTE_SUCCESS) &&
+ (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) {
+ BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda,
+ smp_processor_id()));
+ spin_unlock_irqrestore(recovery_lock, irq_flags);
+ return;
+ }
+ /*
+ * Lock all interfaces on this node to prevent new transfers
+ * from being queued.
+ */
+ for (i = 0; i < BTES_PER_NODE; i++) {
+ if (err_nodepda->bte_if[i].cleanup_active) {
+ continue;
+ }
+ spin_lock(&err_nodepda->bte_if[i].spinlock);
+ BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda,
+ smp_processor_id(), i));
+ err_nodepda->bte_if[i].cleanup_active = 1;
+ }
+
+ /* Determine information about our hub */
+ nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
+
+ /*
+ * A BTE transfer can use multiple CRBs. We need to make sure
+ * that all the BTE CRBs are complete (or timed out) before
+ * attempting to clean up the error. Resetting the BTE while
+ * there are still BTE CRBs active will hang the BTE.
+ * We should look at all the CRBs to see if they are allocated
+ * to the BTE and see if they are still active. When none
+ * are active, we can continue with the cleanup.
+ *
+ * We also want to make sure that the local NI port is up.
+ * When a router resets the NI port can go down, while it
+ * goes through the LLP handshake, but then comes back up.
+ */
+ icmr.ii_icmr_regval = REMOTE_HUB_L(nasid, IIO_ICMR);
+ if (icmr.ii_icmr_fld_s.i_crb_mark != 0) {
+ /*
+ * There are errors which still need to be cleaned up by
+ * hubiio_crb_error_handler
+ */
+ mod_timer(recovery_timer, HZ * 5);
+ BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
+ smp_processor_id()));
+ spin_unlock_irqrestore(recovery_lock, irq_flags);
+ return;
+ }
+ if (icmr.ii_icmr_fld_s.i_crb_vld != 0) {
+
+ valid_crbs = icmr.ii_icmr_fld_s.i_crb_vld;
+
+ for (i = 0; i < IIO_NUM_CRBS; i++) {
+ if (!((1 << i) & valid_crbs)) {
+ /* This crb was not marked as valid, ignore */
+ continue;
+ }
+ icrbd.ii_icrb0_d_regval =
+ REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
+ if (icrbd.d_bteop) {
+ mod_timer(recovery_timer, HZ * 5);
+ BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
+ err_nodepda, smp_processor_id(),
+ i));
+ spin_unlock_irqrestore(recovery_lock,
+ irq_flags);
+ return;
+ }
+ }
+ }
+
+ BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda, smp_processor_id()));
+ /* Reenable both bte interfaces */
+ imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM);
+ imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1;
+ REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval);
+
+ /* Clear BTE0/1 error bits */
+ ieclr.ii_ieclr_regval = 0;
+ if (err_nodepda->bte_if[0].bh_error != BTE_SUCCESS)
+ ieclr.ii_ieclr_fld_s.i_e_bte_0 = 1;
+ if (err_nodepda->bte_if[1].bh_error != BTE_SUCCESS)
+ ieclr.ii_ieclr_fld_s.i_e_bte_1 = 1;
+ REMOTE_HUB_S(nasid, IIO_IECLR, ieclr.ii_ieclr_regval);
+
+ /* Reinitialize both BTE state machines. */
+ ibcr.ii_ibcr_regval = REMOTE_HUB_L(nasid, IIO_IBCR);
+ ibcr.ii_ibcr_fld_s.i_soft_reset = 1;
+ REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval);
+
+ for (i = 0; i < BTES_PER_NODE; i++) {
+ bh_error = err_nodepda->bte_if[i].bh_error;
+ if (bh_error != BTE_SUCCESS) {
+ /* There is an error which needs to be notified */
+ notify = err_nodepda->bte_if[i].most_rcnt_na;
+ BTE_PRINTK(("cnode %d bte %d error=0x%lx\n",
+ err_nodepda->bte_if[i].bte_cnode,
+ err_nodepda->bte_if[i].bte_num,
+ IBLS_ERROR | (u64) bh_error));
+ *notify = IBLS_ERROR | bh_error;
+ err_nodepda->bte_if[i].bh_error = BTE_SUCCESS;
+ }
+
+ err_nodepda->bte_if[i].cleanup_active = 0;
+ BTE_PRINTK(("eh:%p:%d Unlocked %d\n", err_nodepda,
+ smp_processor_id(), i));
+ spin_unlock(&err_nodepda->bte_if[i].spinlock);
+ }
+
+ del_timer(recovery_timer);
+
+ spin_unlock_irqrestore(recovery_lock, irq_flags);
+}
+
+/*
+ * First part error handler. This is called whenever any error CRB interrupt
+ * is generated by the II.
+ */
+void
+bte_crb_error_handler(cnodeid_t cnode, int btenum,
+ int crbnum, ioerror_t * ioe, int bteop)
+{
+ struct bteinfo_s *bte;
+
+
+ bte = &(NODEPDA(cnode)->bte_if[btenum]);
+
+ /*
+ * The caller has already figured out the error type, we save that
+ * in the bte handle structure for the thread excercising the
+ * interface to consume.
+ */
+ bte->bh_error = ioe->ie_errortype + BTEFAIL_OFFSET;
+ bte->bte_error_count++;
+
+ BTE_PRINTK(("Got an error on cnode %d bte %d: HW error type 0x%x\n",
+ bte->bte_cnode, bte->bte_num, ioe->ie_errortype));
+ bte_error_handler((unsigned long) NODEPDA(cnode));
+}
+
diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c
new file mode 100644
index 00000000000..2bdf684c506
--- /dev/null
+++ b/arch/ia64/sn/kernel/huberror.c
@@ -0,0 +1,201 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000,2002-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <asm/delay.h>
+#include <asm/sn/sn_sal.h>
+#include "ioerror.h"
+#include <asm/sn/addrs.h>
+#include <asm/sn/shubio.h>
+#include <asm/sn/geo.h>
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+#include <asm/sn/bte.h>
+
+void hubiio_crb_error_handler(struct hubdev_info *hubdev_info);
+extern void bte_crb_error_handler(cnodeid_t, int, int, ioerror_t *,
+ int);
+static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep)
+{
+ struct hubdev_info *hubdev_info;
+ struct ia64_sal_retval ret_stuff;
+ nasid_t nasid;
+
+ ret_stuff.status = 0;
+ ret_stuff.v0 = 0;
+ hubdev_info = (struct hubdev_info *)arg;
+ nasid = hubdev_info->hdi_nasid;
+ SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT,
+ (u64) nasid, 0, 0, 0, 0, 0, 0);
+
+ if ((int)ret_stuff.v0)
+ panic("hubii_eint_handler(): Fatal TIO Error");
+
+ if (!(nasid & 1)) /* Not a TIO, handle CRB errors */
+ (void)hubiio_crb_error_handler(hubdev_info);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Free the hub CRB "crbnum" which encountered an error.
+ * Assumption is, error handling was successfully done,
+ * and we now want to return the CRB back to Hub for normal usage.
+ *
+ * In order to free the CRB, all that's needed is to de-allocate it
+ *
+ * Assumption:
+ * No other processor is mucking around with the hub control register.
+ * So, upper layer has to single thread this.
+ */
+void hubiio_crb_free(struct hubdev_info *hubdev_info, int crbnum)
+{
+ ii_icrb0_b_u_t icrbb;
+
+ /*
+ * The hardware does NOT clear the mark bit, so it must get cleared
+ * here to be sure the error is not processed twice.
+ */
+ icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(hubdev_info->hdi_nasid,
+ IIO_ICRB_B(crbnum));
+ icrbb.b_mark = 0;
+ REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICRB_B(crbnum),
+ icrbb.ii_icrb0_b_regval);
+ /*
+ * Deallocate the register wait till hub indicates it's done.
+ */
+ REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICDR, (IIO_ICDR_PND | crbnum));
+ while (REMOTE_HUB_L(hubdev_info->hdi_nasid, IIO_ICDR) & IIO_ICDR_PND)
+ udelay(1);
+
+}
+
+/*
+ * hubiio_crb_error_handler
+ *
+ * This routine gets invoked when a hub gets an error
+ * interrupt. So, the routine is running in interrupt context
+ * at error interrupt level.
+ * Action:
+ * It's responsible for identifying ALL the CRBs that are marked
+ * with error, and process them.
+ *
+ * If you find the CRB that's marked with error, map this to the
+ * reason it caused error, and invoke appropriate error handler.
+ *
+ * XXX Be aware of the information in the context register.
+ *
+ * NOTE:
+ * Use REMOTE_HUB_* macro instead of LOCAL_HUB_* so that the interrupt
+ * handler can be run on any node. (not necessarily the node
+ * corresponding to the hub that encountered error).
+ */
+
+void hubiio_crb_error_handler(struct hubdev_info *hubdev_info)
+{
+ nasid_t nasid;
+ ii_icrb0_a_u_t icrba; /* II CRB Register A */
+ ii_icrb0_b_u_t icrbb; /* II CRB Register B */
+ ii_icrb0_c_u_t icrbc; /* II CRB Register C */
+ ii_icrb0_d_u_t icrbd; /* II CRB Register D */
+ ii_icrb0_e_u_t icrbe; /* II CRB Register D */
+ int i;
+ int num_errors = 0; /* Num of errors handled */
+ ioerror_t ioerror;
+
+ nasid = hubdev_info->hdi_nasid;
+
+ /*
+ * XXX - Add locking for any recovery actions
+ */
+ /*
+ * Scan through all CRBs in the Hub, and handle the errors
+ * in any of the CRBs marked.
+ */
+ for (i = 0; i < IIO_NUM_CRBS; i++) {
+ /* Check this crb entry to see if it is in error. */
+ icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i));
+
+ if (icrbb.b_mark == 0) {
+ continue;
+ }
+
+ icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i));
+
+ IOERROR_INIT(&ioerror);
+
+ /* read other CRB error registers. */
+ icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i));
+ icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
+ icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i));
+
+ IOERROR_SETVALUE(&ioerror, errortype, icrbb.b_ecode);
+
+ /* Check if this error is due to BTE operation,
+ * and handle it separately.
+ */
+ if (icrbd.d_bteop ||
+ ((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 ||
+ icrbb.b_initiator == IIO_ICRB_INIT_BTE1) &&
+ (icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE ||
+ icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))) {
+
+ int bte_num;
+
+ if (icrbd.d_bteop)
+ bte_num = icrbc.c_btenum;
+ else /* b_initiator bit 2 gives BTE number */
+ bte_num = (icrbb.b_initiator & 0x4) >> 2;
+
+ hubiio_crb_free(hubdev_info, i);
+
+ bte_crb_error_handler(nasid_to_cnodeid(nasid), bte_num,
+ i, &ioerror, icrbd.d_bteop);
+ num_errors++;
+ continue;
+ }
+ }
+}
+
+/*
+ * Function : hub_error_init
+ * Purpose : initialize the error handling requirements for a given hub.
+ * Parameters : cnode, the compact nodeid.
+ * Assumptions : Called only once per hub, either by a local cpu. Or by a
+ * remote cpu, when this hub is headless.(cpuless)
+ * Returns : None
+ */
+void hub_error_init(struct hubdev_info *hubdev_info)
+{
+ if (request_irq(SGI_II_ERROR, (void *)hub_eint_handler, SA_SHIRQ,
+ "SN_hub_error", (void *)hubdev_info))
+ printk("hub_error_init: Failed to request_irq for 0x%p\n",
+ hubdev_info);
+ return;
+}
+
+
+/*
+ * Function : ice_error_init
+ * Purpose : initialize the error handling requirements for a given tio.
+ * Parameters : cnode, the compact nodeid.
+ * Assumptions : Called only once per tio.
+ * Returns : None
+ */
+void ice_error_init(struct hubdev_info *hubdev_info)
+{
+ if (request_irq
+ (SGI_TIO_ERROR, (void *)hub_eint_handler, SA_SHIRQ, "SN_TIO_error",
+ (void *)hubdev_info))
+ printk("ice_error_init: request_irq() error hubdev_info 0x%p\n",
+ hubdev_info);
+ return;
+}
+
diff --git a/arch/ia64/sn/kernel/idle.c b/arch/ia64/sn/kernel/idle.c
new file mode 100644
index 00000000000..49d178f022b
--- /dev/null
+++ b/arch/ia64/sn/kernel/idle.c
@@ -0,0 +1,30 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2001-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <asm/sn/leds.h>
+
+void snidle(int state)
+{
+ if (state) {
+ if (pda->idle_flag == 0) {
+ /*
+ * Turn the activity LED off.
+ */
+ set_led_bits(0, LED_CPU_ACTIVITY);
+ }
+
+ pda->idle_flag = 1;
+ } else {
+ /*
+ * Turn the activity LED on.
+ */
+ set_led_bits(LED_CPU_ACTIVITY, LED_CPU_ACTIVITY);
+
+ pda->idle_flag = 0;
+ }
+}
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
new file mode 100644
index 00000000000..001880812b7
--- /dev/null
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -0,0 +1,411 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/bootmem.h>
+#include <linux/nodemask.h>
+#include <asm/sn/types.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/addrs.h>
+#include "pci/pcibus_provider_defs.h"
+#include "pci/pcidev.h"
+#include "pci/pcibr_provider.h"
+#include "xtalk/xwidgetdev.h"
+#include <asm/sn/geo.h>
+#include "xtalk/hubdev.h"
+#include <asm/sn/io.h>
+#include <asm/sn/simulator.h>
+
+char master_baseio_wid;
+nasid_t master_nasid = INVALID_NASID; /* Partition Master */
+
+struct slab_info {
+ struct hubdev_info hubdev;
+};
+
+struct brick {
+ moduleid_t id; /* Module ID of this module */
+ struct slab_info slab_info[MAX_SLABS + 1];
+};
+
+int sn_ioif_inited = 0; /* SN I/O infrastructure initialized? */
+
+/*
+ * Retrieve the DMA Flush List given nasid. This list is needed
+ * to implement the WAR - Flush DMA data on PIO Reads.
+ */
+static inline uint64_t
+sal_get_widget_dmaflush_list(u64 nasid, u64 widget_num, u64 address)
+{
+
+ struct ia64_sal_retval ret_stuff;
+ ret_stuff.status = 0;
+ ret_stuff.v0 = 0;
+
+ SAL_CALL_NOLOCK(ret_stuff,
+ (u64) SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST,
+ (u64) nasid, (u64) widget_num, (u64) address, 0, 0, 0,
+ 0);
+ return ret_stuff.v0;
+
+}
+
+/*
+ * Retrieve the hub device info structure for the given nasid.
+ */
+static inline uint64_t sal_get_hubdev_info(u64 handle, u64 address)
+{
+
+ struct ia64_sal_retval ret_stuff;
+ ret_stuff.status = 0;
+ ret_stuff.v0 = 0;
+
+ SAL_CALL_NOLOCK(ret_stuff,
+ (u64) SN_SAL_IOIF_GET_HUBDEV_INFO,
+ (u64) handle, (u64) address, 0, 0, 0, 0, 0);
+ return ret_stuff.v0;
+}
+
+/*
+ * Retrieve the pci bus information given the bus number.
+ */
+static inline uint64_t sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
+{
+
+ struct ia64_sal_retval ret_stuff;
+ ret_stuff.status = 0;
+ ret_stuff.v0 = 0;
+
+ SAL_CALL_NOLOCK(ret_stuff,
+ (u64) SN_SAL_IOIF_GET_PCIBUS_INFO,
+ (u64) segment, (u64) busnum, (u64) address, 0, 0, 0, 0);
+ return ret_stuff.v0;
+}
+
+/*
+ * Retrieve the pci device information given the bus and device|function number.
+ */
+static inline uint64_t
+sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev,
+ u64 sn_irq_info)
+{
+ struct ia64_sal_retval ret_stuff;
+ ret_stuff.status = 0;
+ ret_stuff.v0 = 0;
+
+ SAL_CALL_NOLOCK(ret_stuff,
+ (u64) SN_SAL_IOIF_GET_PCIDEV_INFO,
+ (u64) segment, (u64) bus_number, (u64) devfn,
+ (u64) pci_dev,
+ sn_irq_info, 0, 0);
+ return ret_stuff.v0;
+}
+
+/*
+ * sn_alloc_pci_sysdata() - This routine allocates a pci controller
+ * which is expected as the pci_dev and pci_bus sysdata by the Linux
+ * PCI infrastructure.
+ */
+static inline struct pci_controller *sn_alloc_pci_sysdata(void)
+{
+ struct pci_controller *pci_sysdata;
+
+ pci_sysdata = kmalloc(sizeof(*pci_sysdata), GFP_KERNEL);
+ if (!pci_sysdata)
+ BUG();
+
+ memset(pci_sysdata, 0, sizeof(*pci_sysdata));
+ return pci_sysdata;
+}
+
+/*
+ * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for
+ * each node in the system.
+ */
+static void sn_fixup_ionodes(void)
+{
+
+ struct sn_flush_device_list *sn_flush_device_list;
+ struct hubdev_info *hubdev;
+ uint64_t status;
+ uint64_t nasid;
+ int i, widget;
+
+ for (i = 0; i < numionodes; i++) {
+ hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo);
+ nasid = cnodeid_to_nasid(i);
+ status = sal_get_hubdev_info(nasid, (uint64_t) __pa(hubdev));
+ if (status)
+ continue;
+
+ for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++)
+ hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev;
+
+ if (!hubdev->hdi_flush_nasid_list.widget_p)
+ continue;
+
+ hubdev->hdi_flush_nasid_list.widget_p =
+ kmalloc((HUB_WIDGET_ID_MAX + 1) *
+ sizeof(struct sn_flush_device_list *), GFP_KERNEL);
+
+ memset(hubdev->hdi_flush_nasid_list.widget_p, 0x0,
+ (HUB_WIDGET_ID_MAX + 1) *
+ sizeof(struct sn_flush_device_list *));
+
+ for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) {
+ sn_flush_device_list = kmalloc(DEV_PER_WIDGET *
+ sizeof(struct
+ sn_flush_device_list),
+ GFP_KERNEL);
+ memset(sn_flush_device_list, 0x0,
+ DEV_PER_WIDGET *
+ sizeof(struct sn_flush_device_list));
+
+ status =
+ sal_get_widget_dmaflush_list(nasid, widget,
+ (uint64_t)
+ __pa
+ (sn_flush_device_list));
+ if (status) {
+ kfree(sn_flush_device_list);
+ continue;
+ }
+
+ hubdev->hdi_flush_nasid_list.widget_p[widget] =
+ sn_flush_device_list;
+ }
+
+ if (!(i & 1))
+ hub_error_init(hubdev);
+ else
+ ice_error_init(hubdev);
+ }
+
+}
+
+/*
+ * sn_pci_fixup_slot() - This routine sets up a slot's resources
+ * consistent with the Linux PCI abstraction layer. Resources acquired
+ * from our PCI provider include PIO maps to BAR space and interrupt
+ * objects.
+ */
+static void sn_pci_fixup_slot(struct pci_dev *dev)
+{
+ int idx;
+ int segment = 0;
+ uint64_t size;
+ struct sn_irq_info *sn_irq_info;
+ struct pci_dev *host_pci_dev;
+ int status = 0;
+
+ dev->sysdata = kmalloc(sizeof(struct pcidev_info), GFP_KERNEL);
+ if (SN_PCIDEV_INFO(dev) <= 0)
+ BUG(); /* Cannot afford to run out of memory */
+ memset(SN_PCIDEV_INFO(dev), 0, sizeof(struct pcidev_info));
+
+ sn_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
+ if (sn_irq_info <= 0)
+ BUG(); /* Cannot afford to run out of memory */
+ memset(sn_irq_info, 0, sizeof(struct sn_irq_info));
+
+ /* Call to retrieve pci device information needed by kernel. */
+ status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number,
+ dev->devfn,
+ (u64) __pa(SN_PCIDEV_INFO(dev)),
+ (u64) __pa(sn_irq_info));
+ if (status)
+ BUG(); /* Cannot get platform pci device information information */
+
+ /* Copy over PIO Mapped Addresses */
+ for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) {
+ unsigned long start, end, addr;
+
+ if (!SN_PCIDEV_INFO(dev)->pdi_pio_mapped_addr[idx])
+ continue;
+
+ start = dev->resource[idx].start;
+ end = dev->resource[idx].end;
+ size = end - start;
+ addr = SN_PCIDEV_INFO(dev)->pdi_pio_mapped_addr[idx];
+ addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET;
+ dev->resource[idx].start = addr;
+ dev->resource[idx].end = addr + size;
+ if (dev->resource[idx].flags & IORESOURCE_IO)
+ dev->resource[idx].parent = &ioport_resource;
+ else
+ dev->resource[idx].parent = &iomem_resource;
+ }
+
+ /* set up host bus linkages */
+ host_pci_dev =
+ pci_find_slot(SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32,
+ SN_PCIDEV_INFO(dev)->
+ pdi_slot_host_handle & 0xffffffff);
+ SN_PCIDEV_INFO(dev)->pdi_host_pcidev_info =
+ SN_PCIDEV_INFO(host_pci_dev);
+ SN_PCIDEV_INFO(dev)->pdi_linux_pcidev = dev;
+ SN_PCIDEV_INFO(dev)->pdi_pcibus_info = SN_PCIBUS_BUSSOFT(dev->bus);
+
+ /* Only set up IRQ stuff if this device has a host bus context */
+ if (SN_PCIDEV_BUSSOFT(dev) && sn_irq_info->irq_irq) {
+ SN_PCIDEV_INFO(dev)->pdi_sn_irq_info = sn_irq_info;
+ dev->irq = SN_PCIDEV_INFO(dev)->pdi_sn_irq_info->irq_irq;
+ sn_irq_fixup(dev, sn_irq_info);
+ }
+}
+
+/*
+ * sn_pci_controller_fixup() - This routine sets up a bus's resources
+ * consistent with the Linux PCI abstraction layer.
+ */
+static void sn_pci_controller_fixup(int segment, int busnum)
+{
+ int status = 0;
+ int nasid, cnode;
+ struct pci_bus *bus;
+ struct pci_controller *controller;
+ struct pcibus_bussoft *prom_bussoft_ptr;
+ struct hubdev_info *hubdev_info;
+ void *provider_soft;
+
+ status =
+ sal_get_pcibus_info((u64) segment, (u64) busnum,
+ (u64) ia64_tpa(&prom_bussoft_ptr));
+ if (status > 0) {
+ return; /* bus # does not exist */
+ }
+
+ prom_bussoft_ptr = __va(prom_bussoft_ptr);
+ controller = sn_alloc_pci_sysdata();
+ /* controller non-zero is BUG'd in sn_alloc_pci_sysdata */
+
+ bus = pci_scan_bus(busnum, &pci_root_ops, controller);
+ if (bus == NULL) {
+ return; /* error, or bus already scanned */
+ }
+
+ /*
+ * Per-provider fixup. Copies the contents from prom to local
+ * area and links SN_PCIBUS_BUSSOFT().
+ *
+ * Note: Provider is responsible for ensuring that prom_bussoft_ptr
+ * represents an asic-type that it can handle.
+ */
+
+ if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) {
+ return; /* no further fixup necessary */
+ }
+
+ provider_soft = pcibr_bus_fixup(prom_bussoft_ptr);
+ if (provider_soft == NULL) {
+ return; /* fixup failed or not applicable */
+ }
+
+ /*
+ * Generic bus fixup goes here. Don't reference prom_bussoft_ptr
+ * after this point.
+ */
+
+ bus->sysdata = controller;
+ PCI_CONTROLLER(bus)->platform_data = provider_soft;
+
+ nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base);
+ cnode = nasid_to_cnodeid(nasid);
+ hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
+ SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info =
+ &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]);
+}
+
+/*
+ * Ugly hack to get PCI setup until we have a proper ACPI namespace.
+ */
+
+#define PCI_BUSES_TO_SCAN 256
+
+static int __init sn_pci_init(void)
+{
+ int i = 0;
+ struct pci_dev *pci_dev = NULL;
+ extern void sn_init_cpei_timer(void);
+#ifdef CONFIG_PROC_FS
+ extern void register_sn_procfs(void);
+#endif
+
+ if (!ia64_platform_is("sn2") || IS_RUNNING_ON_SIMULATOR())
+ return 0;
+
+ /*
+ * This is needed to avoid bounce limit checks in the blk layer
+ */
+ ia64_max_iommu_merge_mask = ~PAGE_MASK;
+ sn_fixup_ionodes();
+ sn_irq = kmalloc(sizeof(struct sn_irq_info *) * NR_IRQS, GFP_KERNEL);
+ if (sn_irq <= 0)
+ BUG(); /* Canno afford to run out of memory. */
+ memset(sn_irq, 0, sizeof(struct sn_irq_info *) * NR_IRQS);
+
+ sn_init_cpei_timer();
+
+#ifdef CONFIG_PROC_FS
+ register_sn_procfs();
+#endif
+
+ for (i = 0; i < PCI_BUSES_TO_SCAN; i++) {
+ sn_pci_controller_fixup(0, i);
+ }
+
+ /*
+ * Generic Linux PCI Layer has created the pci_bus and pci_dev
+ * structures - time for us to add our SN PLatform specific
+ * information.
+ */
+
+ while ((pci_dev =
+ pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL) {
+ sn_pci_fixup_slot(pci_dev);
+ }
+
+ sn_ioif_inited = 1; /* sn I/O infrastructure now initialized */
+
+ return 0;
+}
+
+/*
+ * hubdev_init_node() - Creates the HUB data structure and link them to it's
+ * own NODE specific data area.
+ */
+void hubdev_init_node(nodepda_t * npda, cnodeid_t node)
+{
+
+ struct hubdev_info *hubdev_info;
+
+ if (node >= num_online_nodes()) /* Headless/memless IO nodes */
+ hubdev_info =
+ (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(0),
+ sizeof(struct
+ hubdev_info));
+ else
+ hubdev_info =
+ (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(node),
+ sizeof(struct
+ hubdev_info));
+ npda->pdinfo = (void *)hubdev_info;
+
+}
+
+geoid_t
+cnodeid_get_geoid(cnodeid_t cnode)
+{
+
+ struct hubdev_info *hubdev;
+
+ hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
+ return hubdev->hdi_geoid;
+
+}
+
+subsys_initcall(sn_pci_init);
diff --git a/arch/ia64/sn/kernel/iomv.c b/arch/ia64/sn/kernel/iomv.c
new file mode 100644
index 00000000000..fec6d8b8237
--- /dev/null
+++ b/arch/ia64/sn/kernel/iomv.c
@@ -0,0 +1,70 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/simulator.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/shub_mmr.h>
+
+/**
+ * sn_io_addr - convert an in/out port to an i/o address
+ * @port: port to convert
+ *
+ * Legacy in/out instructions are converted to ld/st instructions
+ * on IA64. This routine will convert a port number into a valid
+ * SN i/o address. Used by sn_in*() and sn_out*().
+ */
+void *sn_io_addr(unsigned long port)
+{
+ if (!IS_RUNNING_ON_SIMULATOR()) {
+ /* On sn2, legacy I/O ports don't point at anything */
+ if (port < (64 * 1024))
+ return NULL;
+ return ((void *)(port | __IA64_UNCACHED_OFFSET));
+ } else {
+ /* but the simulator uses them... */
+ unsigned long addr;
+
+ /*
+ * word align port, but need more than 10 bits
+ * for accessing registers in bedrock local block
+ * (so we don't do port&0xfff)
+ */
+ addr = (is_shub2() ? 0xc00000028c000000UL : 0xc0000087cc000000UL) | ((port >> 2) << 12);
+ if ((port >= 0x1f0 && port <= 0x1f7) || port == 0x3f6 || port == 0x3f7)
+ addr |= port;
+ return (void *)addr;
+ }
+}
+
+EXPORT_SYMBOL(sn_io_addr);
+
+/**
+ * __sn_mmiowb - I/O space memory barrier
+ *
+ * See include/asm-ia64/io.h and Documentation/DocBook/deviceiobook.tmpl
+ * for details.
+ *
+ * On SN2, we wait for the PIO_WRITE_STATUS SHub register to clear.
+ * See PV 871084 for details about the WAR about zero value.
+ *
+ */
+void __sn_mmiowb(void)
+{
+ volatile unsigned long *adr = pda->pio_write_status_addr;
+ unsigned long val = pda->pio_write_status_val;
+
+ while ((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != val)
+ cpu_relax();
+}
+
+EXPORT_SYMBOL(__sn_mmiowb);
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
new file mode 100644
index 00000000000..3be44724f6c
--- /dev/null
+++ b/arch/ia64/sn/kernel/irq.c
@@ -0,0 +1,431 @@
+/*
+ * Platform dependent support for SGI SN
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <linux/irq.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/arch.h>
+#include "xtalk/xwidgetdev.h"
+#include "pci/pcibus_provider_defs.h"
+#include "pci/pcidev.h"
+#include "pci/pcibr_provider.h"
+#include <asm/sn/shub_mmr.h>
+#include <asm/sn/sn_sal.h>
+
+static void force_interrupt(int irq);
+static void register_intr_pda(struct sn_irq_info *sn_irq_info);
+static void unregister_intr_pda(struct sn_irq_info *sn_irq_info);
+
+extern int sn_force_interrupt_flag;
+extern int sn_ioif_inited;
+struct sn_irq_info **sn_irq;
+
+static inline uint64_t sn_intr_alloc(nasid_t local_nasid, int local_widget,
+ u64 sn_irq_info,
+ int req_irq, nasid_t req_nasid,
+ int req_slice)
+{
+ struct ia64_sal_retval ret_stuff;
+ ret_stuff.status = 0;
+ ret_stuff.v0 = 0;
+
+ SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT,
+ (u64) SAL_INTR_ALLOC, (u64) local_nasid,
+ (u64) local_widget, (u64) sn_irq_info, (u64) req_irq,
+ (u64) req_nasid, (u64) req_slice);
+ return ret_stuff.status;
+}
+
+static inline void sn_intr_free(nasid_t local_nasid, int local_widget,
+ struct sn_irq_info *sn_irq_info)
+{
+ struct ia64_sal_retval ret_stuff;
+ ret_stuff.status = 0;
+ ret_stuff.v0 = 0;
+
+ SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT,
+ (u64) SAL_INTR_FREE, (u64) local_nasid,
+ (u64) local_widget, (u64) sn_irq_info->irq_irq,
+ (u64) sn_irq_info->irq_cookie, 0, 0);
+}
+
+static unsigned int sn_startup_irq(unsigned int irq)
+{
+ return 0;
+}
+
+static void sn_shutdown_irq(unsigned int irq)
+{
+}
+
+static void sn_disable_irq(unsigned int irq)
+{
+}
+
+static void sn_enable_irq(unsigned int irq)
+{
+}
+
+static void sn_ack_irq(unsigned int irq)
+{
+ uint64_t event_occurred, mask = 0;
+ int nasid;
+
+ irq = irq & 0xff;
+ nasid = get_nasid();
+ event_occurred =
+ HUB_L((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED));
+ if (event_occurred & SH_EVENT_OCCURRED_UART_INT_MASK) {
+ mask |= (1 << SH_EVENT_OCCURRED_UART_INT_SHFT);
+ }
+ if (event_occurred & SH_EVENT_OCCURRED_IPI_INT_MASK) {
+ mask |= (1 << SH_EVENT_OCCURRED_IPI_INT_SHFT);
+ }
+ if (event_occurred & SH_EVENT_OCCURRED_II_INT0_MASK) {
+ mask |= (1 << SH_EVENT_OCCURRED_II_INT0_SHFT);
+ }
+ if (event_occurred & SH_EVENT_OCCURRED_II_INT1_MASK) {
+ mask |= (1 << SH_EVENT_OCCURRED_II_INT1_SHFT);
+ }
+ HUB_S((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED_ALIAS),
+ mask);
+ __set_bit(irq, (volatile void *)pda->sn_in_service_ivecs);
+
+ move_irq(irq);
+}
+
+static void sn_end_irq(unsigned int irq)
+{
+ int nasid;
+ int ivec;
+ uint64_t event_occurred;
+
+ ivec = irq & 0xff;
+ if (ivec == SGI_UART_VECTOR) {
+ nasid = get_nasid();
+ event_occurred = HUB_L((uint64_t *) GLOBAL_MMR_ADDR
+ (nasid, SH_EVENT_OCCURRED));
+ /* If the UART bit is set here, we may have received an
+ * interrupt from the UART that the driver missed. To
+ * make sure, we IPI ourselves to force us to look again.
+ */
+ if (event_occurred & SH_EVENT_OCCURRED_UART_INT_MASK) {
+ platform_send_ipi(smp_processor_id(), SGI_UART_VECTOR,
+ IA64_IPI_DM_INT, 0);
+ }
+ }
+ __clear_bit(ivec, (volatile void *)pda->sn_in_service_ivecs);
+ if (sn_force_interrupt_flag)
+ force_interrupt(irq);
+}
+
+static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+ struct sn_irq_info *sn_irq_info = sn_irq[irq];
+ struct sn_irq_info *tmp_sn_irq_info;
+ int cpuid, cpuphys;
+ nasid_t t_nasid; /* nasid to target */
+ int t_slice; /* slice to target */
+
+ /* allocate a temp sn_irq_info struct to get new target info */
+ tmp_sn_irq_info = kmalloc(sizeof(*tmp_sn_irq_info), GFP_KERNEL);
+ if (!tmp_sn_irq_info)
+ return;
+
+ cpuid = first_cpu(mask);
+ cpuphys = cpu_physical_id(cpuid);
+ t_nasid = cpuid_to_nasid(cpuid);
+ t_slice = cpuid_to_slice(cpuid);
+
+ while (sn_irq_info) {
+ int status;
+ int local_widget;
+ uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge;
+ nasid_t local_nasid = NASID_GET(bridge);
+
+ if (!bridge)
+ break; /* irq is not a device interrupt */
+
+ if (local_nasid & 1)
+ local_widget = TIO_SWIN_WIDGETNUM(bridge);
+ else
+ local_widget = SWIN_WIDGETNUM(bridge);
+
+ /* Free the old PROM sn_irq_info structure */
+ sn_intr_free(local_nasid, local_widget, sn_irq_info);
+
+ /* allocate a new PROM sn_irq_info struct */
+ status = sn_intr_alloc(local_nasid, local_widget,
+ __pa(tmp_sn_irq_info), irq, t_nasid,
+ t_slice);
+
+ if (status == 0) {
+ /* Update kernels sn_irq_info with new target info */
+ unregister_intr_pda(sn_irq_info);
+ sn_irq_info->irq_cpuid = cpuid;
+ sn_irq_info->irq_nasid = t_nasid;
+ sn_irq_info->irq_slice = t_slice;
+ sn_irq_info->irq_xtalkaddr =
+ tmp_sn_irq_info->irq_xtalkaddr;
+ sn_irq_info->irq_cookie = tmp_sn_irq_info->irq_cookie;
+ register_intr_pda(sn_irq_info);
+
+ if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type)) {
+ pcibr_change_devices_irq(sn_irq_info);
+ }
+
+ sn_irq_info = sn_irq_info->irq_next;
+
+#ifdef CONFIG_SMP
+ set_irq_affinity_info((irq & 0xff), cpuphys, 0);
+#endif
+ } else {
+ break; /* snp_affinity failed the intr_alloc */
+ }
+ }
+ kfree(tmp_sn_irq_info);
+}
+
+struct hw_interrupt_type irq_type_sn = {
+ "SN hub",
+ sn_startup_irq,
+ sn_shutdown_irq,
+ sn_enable_irq,
+ sn_disable_irq,
+ sn_ack_irq,
+ sn_end_irq,
+ sn_set_affinity_irq
+};
+
+unsigned int sn_local_vector_to_irq(u8 vector)
+{
+ return (CPU_VECTOR_TO_IRQ(smp_processor_id(), vector));
+}
+
+void sn_irq_init(void)
+{
+ int i;
+ irq_desc_t *base_desc = irq_desc;
+
+ for (i = 0; i < NR_IRQS; i++) {
+ if (base_desc[i].handler == &no_irq_type) {
+ base_desc[i].handler = &irq_type_sn;
+ }
+ }
+}
+
+static void register_intr_pda(struct sn_irq_info *sn_irq_info)
+{
+ int irq = sn_irq_info->irq_irq;
+ int cpu = sn_irq_info->irq_cpuid;
+
+ if (pdacpu(cpu)->sn_last_irq < irq) {
+ pdacpu(cpu)->sn_last_irq = irq;
+ }
+
+ if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq) {
+ pdacpu(cpu)->sn_first_irq = irq;
+ }
+}
+
+static void unregister_intr_pda(struct sn_irq_info *sn_irq_info)
+{
+ int irq = sn_irq_info->irq_irq;
+ int cpu = sn_irq_info->irq_cpuid;
+ struct sn_irq_info *tmp_irq_info;
+ int i, foundmatch;
+
+ if (pdacpu(cpu)->sn_last_irq == irq) {
+ foundmatch = 0;
+ for (i = pdacpu(cpu)->sn_last_irq - 1; i; i--) {
+ tmp_irq_info = sn_irq[i];
+ while (tmp_irq_info) {
+ if (tmp_irq_info->irq_cpuid == cpu) {
+ foundmatch++;
+ break;
+ }
+ tmp_irq_info = tmp_irq_info->irq_next;
+ }
+ if (foundmatch) {
+ break;
+ }
+ }
+ pdacpu(cpu)->sn_last_irq = i;
+ }
+
+ if (pdacpu(cpu)->sn_first_irq == irq) {
+ foundmatch = 0;
+ for (i = pdacpu(cpu)->sn_first_irq + 1; i < NR_IRQS; i++) {
+ tmp_irq_info = sn_irq[i];
+ while (tmp_irq_info) {
+ if (tmp_irq_info->irq_cpuid == cpu) {
+ foundmatch++;
+ break;
+ }
+ tmp_irq_info = tmp_irq_info->irq_next;
+ }
+ if (foundmatch) {
+ break;
+ }
+ }
+ pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i);
+ }
+}
+
+struct sn_irq_info *sn_irq_alloc(nasid_t local_nasid, int local_widget, int irq,
+ nasid_t nasid, int slice)
+{
+ struct sn_irq_info *sn_irq_info;
+ int status;
+
+ sn_irq_info = kmalloc(sizeof(*sn_irq_info), GFP_KERNEL);
+ if (sn_irq_info == NULL)
+ return NULL;
+
+ memset(sn_irq_info, 0x0, sizeof(*sn_irq_info));
+
+ status =
+ sn_intr_alloc(local_nasid, local_widget, __pa(sn_irq_info), irq,
+ nasid, slice);
+
+ if (status) {
+ kfree(sn_irq_info);
+ return NULL;
+ } else {
+ return sn_irq_info;
+ }
+}
+
+void sn_irq_free(struct sn_irq_info *sn_irq_info)
+{
+ uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge;
+ nasid_t local_nasid = NASID_GET(bridge);
+ int local_widget;
+
+ if (local_nasid & 1) /* tio check */
+ local_widget = TIO_SWIN_WIDGETNUM(bridge);
+ else
+ local_widget = SWIN_WIDGETNUM(bridge);
+
+ sn_intr_free(local_nasid, local_widget, sn_irq_info);
+
+ kfree(sn_irq_info);
+}
+
+void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info)
+{
+ nasid_t nasid = sn_irq_info->irq_nasid;
+ int slice = sn_irq_info->irq_slice;
+ int cpu = nasid_slice_to_cpuid(nasid, slice);
+
+ sn_irq_info->irq_cpuid = cpu;
+ sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev);
+
+ /* link it into the sn_irq[irq] list */
+ sn_irq_info->irq_next = sn_irq[sn_irq_info->irq_irq];
+ sn_irq[sn_irq_info->irq_irq] = sn_irq_info;
+
+ (void)register_intr_pda(sn_irq_info);
+}
+
+static void force_interrupt(int irq)
+{
+ struct sn_irq_info *sn_irq_info;
+
+ if (!sn_ioif_inited)
+ return;
+ sn_irq_info = sn_irq[irq];
+ while (sn_irq_info) {
+ if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) &&
+ (sn_irq_info->irq_bridge != NULL)) {
+ pcibr_force_interrupt(sn_irq_info);
+ }
+ sn_irq_info = sn_irq_info->irq_next;
+ }
+}
+
+/*
+ * Check for lost interrupts. If the PIC int_status reg. says that
+ * an interrupt has been sent, but not handled, and the interrupt
+ * is not pending in either the cpu irr regs or in the soft irr regs,
+ * and the interrupt is not in service, then the interrupt may have
+ * been lost. Force an interrupt on that pin. It is possible that
+ * the interrupt is in flight, so we may generate a spurious interrupt,
+ * but we should never miss a real lost interrupt.
+ */
+static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info)
+{
+ uint64_t regval;
+ int irr_reg_num;
+ int irr_bit;
+ uint64_t irr_reg;
+ struct pcidev_info *pcidev_info;
+ struct pcibus_info *pcibus_info;
+
+ pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+ if (!pcidev_info)
+ return;
+
+ pcibus_info =
+ (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info->
+ pdi_pcibus_info;
+ regval = pcireg_intr_status_get(pcibus_info);
+
+ irr_reg_num = irq_to_vector(irq) / 64;
+ irr_bit = irq_to_vector(irq) % 64;
+ switch (irr_reg_num) {
+ case 0:
+ irr_reg = ia64_getreg(_IA64_REG_CR_IRR0);
+ break;
+ case 1:
+ irr_reg = ia64_getreg(_IA64_REG_CR_IRR1);
+ break;
+ case 2:
+ irr_reg = ia64_getreg(_IA64_REG_CR_IRR2);
+ break;
+ case 3:
+ irr_reg = ia64_getreg(_IA64_REG_CR_IRR3);
+ break;
+ }
+ if (!test_bit(irr_bit, &irr_reg)) {
+ if (!test_bit(irq, pda->sn_soft_irr)) {
+ if (!test_bit(irq, pda->sn_in_service_ivecs)) {
+ regval &= 0xff;
+ if (sn_irq_info->irq_int_bit & regval &
+ sn_irq_info->irq_last_intr) {
+ regval &=
+ ~(sn_irq_info->
+ irq_int_bit & regval);
+ pcibr_force_interrupt(sn_irq_info);
+ }
+ }
+ }
+ }
+ sn_irq_info->irq_last_intr = regval;
+}
+
+void sn_lb_int_war_check(void)
+{
+ int i;
+
+ if (!sn_ioif_inited || pda->sn_first_irq == 0)
+ return;
+ for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) {
+ struct sn_irq_info *sn_irq_info = sn_irq[i];
+ while (sn_irq_info) {
+ /* Only call for PCI bridges that are fully initialized. */
+ if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) &&
+ (sn_irq_info->irq_bridge != NULL)) {
+ sn_check_intr(i, sn_irq_info);
+ }
+ sn_irq_info = sn_irq_info->irq_next;
+ }
+ }
+}
diff --git a/arch/ia64/sn/kernel/klconflib.c b/arch/ia64/sn/kernel/klconflib.c
new file mode 100644
index 00000000000..0f11a3299cd
--- /dev/null
+++ b/arch/ia64/sn/kernel/klconflib.c
@@ -0,0 +1,108 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <asm/sn/types.h>
+#include <asm/sn/module.h>
+#include <asm/sn/l1.h>
+
+char brick_types[MAX_BRICK_TYPES + 1] = "cri.xdpn%#=vo^kjbf890123456789...";
+/*
+ * Format a module id for printing.
+ *
+ * There are three possible formats:
+ *
+ * MODULE_FORMAT_BRIEF is the brief 6-character format, including
+ * the actual brick-type as recorded in the
+ * moduleid_t, eg. 002c15 for a C-brick, or
+ * 101#17 for a PX-brick.
+ *
+ * MODULE_FORMAT_LONG is the hwgraph format, eg. rack/002/bay/15
+ * of rack/101/bay/17 (note that the brick
+ * type does not appear in this format).
+ *
+ * MODULE_FORMAT_LCD is like MODULE_FORMAT_BRIEF, except that it
+ * ensures that the module id provided appears
+ * exactly as it would on the LCD display of
+ * the corresponding brick, eg. still 002c15
+ * for a C-brick, but 101p17 for a PX-brick.
+ *
+ * maule (9/13/04): Removed top-level check for (fmt == MODULE_FORMAT_LCD)
+ * making MODULE_FORMAT_LCD equivalent to MODULE_FORMAT_BRIEF. It was
+ * decided that all callers should assume the returned string should be what
+ * is displayed on the brick L1 LCD.
+ */
+void
+format_module_id(char *buffer, moduleid_t m, int fmt)
+{
+ int rack, position;
+ unsigned char brickchar;
+
+ rack = MODULE_GET_RACK(m);
+ brickchar = MODULE_GET_BTCHAR(m);
+
+ /* Be sure we use the same brick type character as displayed
+ * on the brick's LCD
+ */
+ switch (brickchar)
+ {
+ case L1_BRICKTYPE_GA:
+ case L1_BRICKTYPE_OPUS_TIO:
+ brickchar = L1_BRICKTYPE_C;
+ break;
+
+ case L1_BRICKTYPE_PX:
+ case L1_BRICKTYPE_PE:
+ case L1_BRICKTYPE_PA:
+ case L1_BRICKTYPE_SA: /* we can move this to the "I's" later
+ * if that makes more sense
+ */
+ brickchar = L1_BRICKTYPE_P;
+ break;
+
+ case L1_BRICKTYPE_IX:
+ case L1_BRICKTYPE_IA:
+
+ brickchar = L1_BRICKTYPE_I;
+ break;
+ }
+
+ position = MODULE_GET_BPOS(m);
+
+ if ((fmt == MODULE_FORMAT_BRIEF) || (fmt == MODULE_FORMAT_LCD)) {
+ /* Brief module number format, eg. 002c15 */
+
+ /* Decompress the rack number */
+ *buffer++ = '0' + RACK_GET_CLASS(rack);
+ *buffer++ = '0' + RACK_GET_GROUP(rack);
+ *buffer++ = '0' + RACK_GET_NUM(rack);
+
+ /* Add the brick type */
+ *buffer++ = brickchar;
+ }
+ else if (fmt == MODULE_FORMAT_LONG) {
+ /* Fuller hwgraph format, eg. rack/002/bay/15 */
+
+ strcpy(buffer, "rack" "/"); buffer += strlen(buffer);
+
+ *buffer++ = '0' + RACK_GET_CLASS(rack);
+ *buffer++ = '0' + RACK_GET_GROUP(rack);
+ *buffer++ = '0' + RACK_GET_NUM(rack);
+
+ strcpy(buffer, "/" "bay" "/"); buffer += strlen(buffer);
+ }
+
+ /* Add the bay position, using at least two digits */
+ if (position < 10)
+ *buffer++ = '0';
+ sprintf(buffer, "%d", position);
+
+}
diff --git a/arch/ia64/sn/kernel/machvec.c b/arch/ia64/sn/kernel/machvec.c
new file mode 100644
index 00000000000..02bb9155840
--- /dev/null
+++ b/arch/ia64/sn/kernel/machvec.c
@@ -0,0 +1,11 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2002-2003 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#define MACHVEC_PLATFORM_NAME sn2
+#define MACHVEC_PLATFORM_HEADER <asm/machvec_sn2.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c
new file mode 100644
index 00000000000..857774bb2c9
--- /dev/null
+++ b/arch/ia64/sn/kernel/mca.c
@@ -0,0 +1,135 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/vmalloc.h>
+#include <asm/mca.h>
+#include <asm/sal.h>
+#include <asm/sn/sn_sal.h>
+
+/*
+ * Interval for calling SAL to poll for errors that do NOT cause error
+ * interrupts. SAL will raise a CPEI if any errors are present that
+ * need to be logged.
+ */
+#define CPEI_INTERVAL (5*HZ)
+
+struct timer_list sn_cpei_timer;
+void sn_init_cpei_timer(void);
+
+/* Printing oemdata from mca uses data that is not passed through SAL, it is
+ * global. Only one user at a time.
+ */
+static DECLARE_MUTEX(sn_oemdata_mutex);
+static u8 **sn_oemdata;
+static u64 *sn_oemdata_size, sn_oemdata_bufsize;
+
+/*
+ * print_hook
+ *
+ * This function is the callback routine that SAL calls to log error
+ * info for platform errors. buf is appended to sn_oemdata, resizing as
+ * required.
+ */
+static int print_hook(const char *fmt, ...)
+{
+ char buf[400];
+ int len;
+ va_list args;
+ va_start(args, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+ len = strlen(buf);
+ while (*sn_oemdata_size + len + 1 > sn_oemdata_bufsize) {
+ u8 *newbuf = vmalloc(sn_oemdata_bufsize += 1000);
+ if (!newbuf) {
+ printk(KERN_ERR "%s: unable to extend sn_oemdata\n",
+ __FUNCTION__);
+ return 0;
+ }
+ memcpy(newbuf, *sn_oemdata, *sn_oemdata_size);
+ vfree(*sn_oemdata);
+ *sn_oemdata = newbuf;
+ }
+ memcpy(*sn_oemdata + *sn_oemdata_size, buf, len + 1);
+ *sn_oemdata_size += len;
+ return 0;
+}
+
+static void sn_cpei_handler(int irq, void *devid, struct pt_regs *regs)
+{
+ /*
+ * this function's sole purpose is to call SAL when we receive
+ * a CE interrupt from SHUB or when the timer routine decides
+ * we need to call SAL to check for CEs.
+ */
+
+ /* CALL SAL_LOG_CE */
+
+ ia64_sn_plat_cpei_handler();
+}
+
+static void sn_cpei_timer_handler(unsigned long dummy)
+{
+ sn_cpei_handler(-1, NULL, NULL);
+ mod_timer(&sn_cpei_timer, jiffies + CPEI_INTERVAL);
+}
+
+void sn_init_cpei_timer(void)
+{
+ init_timer(&sn_cpei_timer);
+ sn_cpei_timer.expires = jiffies + CPEI_INTERVAL;
+ sn_cpei_timer.function = sn_cpei_timer_handler;
+ add_timer(&sn_cpei_timer);
+}
+
+static int
+sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata,
+ u64 * oemdata_size)
+{
+ down(&sn_oemdata_mutex);
+ sn_oemdata = oemdata;
+ sn_oemdata_size = oemdata_size;
+ sn_oemdata_bufsize = 0;
+ ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header);
+ up(&sn_oemdata_mutex);
+ return 0;
+}
+
+/* Callback when userspace salinfo wants to decode oem data via the platform
+ * kernel and/or prom.
+ */
+int sn_salinfo_platform_oemdata(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size)
+{
+ efi_guid_t guid = *(efi_guid_t *)sect_header;
+ int valid = 0;
+ *oemdata_size = 0;
+ vfree(*oemdata);
+ *oemdata = NULL;
+ if (efi_guidcmp(guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID) == 0) {
+ sal_log_plat_specific_err_info_t *psei = (sal_log_plat_specific_err_info_t *)sect_header;
+ valid = psei->valid.oem_data;
+ } else if (efi_guidcmp(guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) {
+ sal_log_mem_dev_err_info_t *mdei = (sal_log_mem_dev_err_info_t *)sect_header;
+ valid = mdei->valid.oem_data;
+ }
+ if (valid)
+ return sn_platform_plat_specific_err_print(sect_header, oemdata, oemdata_size);
+ else
+ return 0;
+}
+
+static int __init sn_salinfo_init(void)
+{
+ salinfo_platform_oemdata = &sn_salinfo_platform_oemdata;
+ return 0;
+}
+
+module_init(sn_salinfo_init)
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
new file mode 100644
index 00000000000..f0306b516af
--- /dev/null
+++ b/arch/ia64/sn/kernel/setup.c
@@ -0,0 +1,621 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/kdev_t.h>
+#include <linux/string.h>
+#include <linux/tty.h>
+#include <linux/console.h>
+#include <linux/timex.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/serial.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+#include <linux/mmzone.h>
+#include <linux/interrupt.h>
+#include <linux/acpi.h>
+#include <linux/compiler.h>
+#include <linux/sched.h>
+#include <linux/root_dev.h>
+#include <linux/nodemask.h>
+
+#include <asm/io.h>
+#include <asm/sal.h>
+#include <asm/machvec.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/sn/arch.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/simulator.h>
+#include <asm/sn/leds.h>
+#include <asm/sn/bte.h>
+#include <asm/sn/shub_mmr.h>
+#include <asm/sn/clksupport.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/geo.h>
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+#include <asm/sn/klconfig.h>
+
+
+DEFINE_PER_CPU(struct pda_s, pda_percpu);
+
+#define MAX_PHYS_MEMORY (1UL << 49) /* 1 TB */
+
+lboard_t *root_lboard[MAX_COMPACT_NODES];
+
+extern void bte_init_node(nodepda_t *, cnodeid_t);
+
+extern void sn_timer_init(void);
+extern unsigned long last_time_offset;
+extern void (*ia64_mark_idle) (int);
+extern void snidle(int);
+extern unsigned char acpi_kbd_controller_present;
+
+unsigned long sn_rtc_cycles_per_second;
+EXPORT_SYMBOL(sn_rtc_cycles_per_second);
+
+DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
+EXPORT_PER_CPU_SYMBOL(__sn_hub_info);
+
+partid_t sn_partid = -1;
+EXPORT_SYMBOL(sn_partid);
+char sn_system_serial_number_string[128];
+EXPORT_SYMBOL(sn_system_serial_number_string);
+u64 sn_partition_serial_number;
+EXPORT_SYMBOL(sn_partition_serial_number);
+u8 sn_partition_id;
+EXPORT_SYMBOL(sn_partition_id);
+u8 sn_system_size;
+EXPORT_SYMBOL(sn_system_size);
+u8 sn_sharing_domain_size;
+EXPORT_SYMBOL(sn_sharing_domain_size);
+u8 sn_coherency_id;
+EXPORT_SYMBOL(sn_coherency_id);
+u8 sn_region_size;
+EXPORT_SYMBOL(sn_region_size);
+
+short physical_node_map[MAX_PHYSNODE_ID];
+
+EXPORT_SYMBOL(physical_node_map);
+
+int numionodes;
+
+static void sn_init_pdas(char **);
+static void scan_for_ionodes(void);
+
+static nodepda_t *nodepdaindr[MAX_COMPACT_NODES];
+
+/*
+ * The format of "screen_info" is strange, and due to early i386-setup
+ * code. This is just enough to make the console code think we're on a
+ * VGA color display.
+ */
+struct screen_info sn_screen_info = {
+ .orig_x = 0,
+ .orig_y = 0,
+ .orig_video_mode = 3,
+ .orig_video_cols = 80,
+ .orig_video_ega_bx = 3,
+ .orig_video_lines = 25,
+ .orig_video_isVGA = 1,
+ .orig_video_points = 16
+};
+
+/*
+ * This is here so we can use the CMOS detection in ide-probe.c to
+ * determine what drives are present. In theory, we don't need this
+ * as the auto-detection could be done via ide-probe.c:do_probe() but
+ * in practice that would be much slower, which is painful when
+ * running in the simulator. Note that passing zeroes in DRIVE_INFO
+ * is sufficient (the IDE driver will autodetect the drive geometry).
+ */
+#ifdef CONFIG_IA64_GENERIC
+extern char drive_info[4 * 16];
+#else
+char drive_info[4 * 16];
+#endif
+
+/*
+ * Get nasid of current cpu early in boot before nodepda is initialized
+ */
+static int
+boot_get_nasid(void)
+{
+ int nasid;
+
+ if (ia64_sn_get_sapic_info(get_sapicid(), &nasid, NULL, NULL))
+ BUG();
+ return nasid;
+}
+
+/*
+ * This routine can only be used during init, since
+ * smp_boot_data is an init data structure.
+ * We have to use smp_boot_data.cpu_phys_id to find
+ * the physical id of the processor because the normal
+ * cpu_physical_id() relies on data structures that
+ * may not be initialized yet.
+ */
+
+static int __init pxm_to_nasid(int pxm)
+{
+ int i;
+ int nid;
+
+ nid = pxm_to_nid_map[pxm];
+ for (i = 0; i < num_node_memblks; i++) {
+ if (node_memblk[i].nid == nid) {
+ return NASID_GET(node_memblk[i].start_paddr);
+ }
+ }
+ return -1;
+}
+
+/**
+ * early_sn_setup - early setup routine for SN platforms
+ *
+ * Sets up an initial console to aid debugging. Intended primarily
+ * for bringup. See start_kernel() in init/main.c.
+ */
+
+void __init early_sn_setup(void)
+{
+ efi_system_table_t *efi_systab;
+ efi_config_table_t *config_tables;
+ struct ia64_sal_systab *sal_systab;
+ struct ia64_sal_desc_entry_point *ep;
+ char *p;
+ int i, j;
+
+ /*
+ * Parse enough of the SAL tables to locate the SAL entry point. Since, console
+ * IO on SN2 is done via SAL calls, early_printk won't work without this.
+ *
+ * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c.
+ * Any changes to those file may have to be made hereas well.
+ */
+ efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab);
+ config_tables = __va(efi_systab->tables);
+ for (i = 0; i < efi_systab->nr_tables; i++) {
+ if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) ==
+ 0) {
+ sal_systab = __va(config_tables[i].table);
+ p = (char *)(sal_systab + 1);
+ for (j = 0; j < sal_systab->entry_count; j++) {
+ if (*p == SAL_DESC_ENTRY_POINT) {
+ ep = (struct ia64_sal_desc_entry_point
+ *)p;
+ ia64_sal_handler_init(__va
+ (ep->sal_proc),
+ __va(ep->gp));
+ return;
+ }
+ p += SAL_DESC_SIZE(*p);
+ }
+ }
+ }
+ /* Uh-oh, SAL not available?? */
+ printk(KERN_ERR "failed to find SAL entry point\n");
+}
+
+extern int platform_intr_list[];
+extern nasid_t master_nasid;
+static int shub_1_1_found __initdata;
+
+/*
+ * sn_check_for_wars
+ *
+ * Set flag for enabling shub specific wars
+ */
+
+static inline int __init is_shub_1_1(int nasid)
+{
+ unsigned long id;
+ int rev;
+
+ if (is_shub2())
+ return 0;
+ id = REMOTE_HUB_L(nasid, SH1_SHUB_ID);
+ rev = (id & SH1_SHUB_ID_REVISION_MASK) >> SH1_SHUB_ID_REVISION_SHFT;
+ return rev <= 2;
+}
+
+static void __init sn_check_for_wars(void)
+{
+ int cnode;
+
+ if (is_shub2()) {
+ /* none yet */
+ } else {
+ for_each_online_node(cnode) {
+ if (is_shub_1_1(cnodeid_to_nasid(cnode)))
+ sn_hub_info->shub_1_1_found = 1;
+ }
+ }
+}
+
+/**
+ * sn_setup - SN platform setup routine
+ * @cmdline_p: kernel command line
+ *
+ * Handles platform setup for SN machines. This includes determining
+ * the RTC frequency (via a SAL call), initializing secondary CPUs, and
+ * setting up per-node data areas. The console is also initialized here.
+ */
+void __init sn_setup(char **cmdline_p)
+{
+ long status, ticks_per_sec, drift;
+ int pxm;
+ int major = sn_sal_rev_major(), minor = sn_sal_rev_minor();
+ extern void sn_cpu_init(void);
+
+ /*
+ * If the generic code has enabled vga console support - lets
+ * get rid of it again. This is a kludge for the fact that ACPI
+ * currtently has no way of informing us if legacy VGA is available
+ * or not.
+ */
+#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
+ if (conswitchp == &vga_con) {
+ printk(KERN_DEBUG "SGI: Disabling VGA console\n");
+#ifdef CONFIG_DUMMY_CONSOLE
+ conswitchp = &dummy_con;
+#else
+ conswitchp = NULL;
+#endif /* CONFIG_DUMMY_CONSOLE */
+ }
+#endif /* def(CONFIG_VT) && def(CONFIG_VGA_CONSOLE) */
+
+ MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY;
+
+ memset(physical_node_map, -1, sizeof(physical_node_map));
+ for (pxm = 0; pxm < MAX_PXM_DOMAINS; pxm++)
+ if (pxm_to_nid_map[pxm] != -1)
+ physical_node_map[pxm_to_nasid(pxm)] =
+ pxm_to_nid_map[pxm];
+
+ /*
+ * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard
+ * support here so we don't have to listen to failed keyboard probe
+ * messages.
+ */
+ if ((major < 2 || (major == 2 && minor <= 9)) &&
+ acpi_kbd_controller_present) {
+ printk(KERN_INFO "Disabling legacy keyboard support as prom "
+ "is too old and doesn't provide FADT\n");
+ acpi_kbd_controller_present = 0;
+ }
+
+ printk("SGI SAL version %x.%02x\n", major, minor);
+
+ /*
+ * Confirm the SAL we're running on is recent enough...
+ */
+ if ((major < SN_SAL_MIN_MAJOR) || (major == SN_SAL_MIN_MAJOR &&
+ minor < SN_SAL_MIN_MINOR)) {
+ printk(KERN_ERR "This kernel needs SGI SAL version >= "
+ "%x.%02x\n", SN_SAL_MIN_MAJOR, SN_SAL_MIN_MINOR);
+ panic("PROM version too old\n");
+ }
+
+ master_nasid = boot_get_nasid();
+
+ status =
+ ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
+ &drift);
+ if (status != 0 || ticks_per_sec < 100000) {
+ printk(KERN_WARNING
+ "unable to determine platform RTC clock frequency, guessing.\n");
+ /* PROM gives wrong value for clock freq. so guess */
+ sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
+ } else
+ sn_rtc_cycles_per_second = ticks_per_sec;
+
+ platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR;
+
+ /*
+ * we set the default root device to /dev/hda
+ * to make simulation easy
+ */
+ ROOT_DEV = Root_HDA1;
+
+ /*
+ * Create the PDAs and NODEPDAs for all the cpus.
+ */
+ sn_init_pdas(cmdline_p);
+
+ ia64_mark_idle = &snidle;
+
+ /*
+ * For the bootcpu, we do this here. All other cpus will make the
+ * call as part of cpu_init in slave cpu initialization.
+ */
+ sn_cpu_init();
+
+#ifdef CONFIG_SMP
+ init_smp_config();
+#endif
+ screen_info = sn_screen_info;
+
+ sn_timer_init();
+}
+
+/**
+ * sn_init_pdas - setup node data areas
+ *
+ * One time setup for Node Data Area. Called by sn_setup().
+ */
+static void __init sn_init_pdas(char **cmdline_p)
+{
+ cnodeid_t cnode;
+
+ memset(pda->cnodeid_to_nasid_table, -1,
+ sizeof(pda->cnodeid_to_nasid_table));
+ for_each_online_node(cnode)
+ pda->cnodeid_to_nasid_table[cnode] =
+ pxm_to_nasid(nid_to_pxm_map[cnode]);
+
+ numionodes = num_online_nodes();
+ scan_for_ionodes();
+
+ /*
+ * Allocate & initalize the nodepda for each node.
+ */
+ for_each_online_node(cnode) {
+ nodepdaindr[cnode] =
+ alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t));
+ memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
+ memset(nodepdaindr[cnode]->phys_cpuid, -1,
+ sizeof(nodepdaindr[cnode]->phys_cpuid));
+ }
+
+ /*
+ * Allocate & initialize nodepda for TIOs. For now, put them on node 0.
+ */
+ for (cnode = num_online_nodes(); cnode < numionodes; cnode++) {
+ nodepdaindr[cnode] =
+ alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t));
+ memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
+ }
+
+ /*
+ * Now copy the array of nodepda pointers to each nodepda.
+ */
+ for (cnode = 0; cnode < numionodes; cnode++)
+ memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr,
+ sizeof(nodepdaindr));
+
+ /*
+ * Set up IO related platform-dependent nodepda fields.
+ * The following routine actually sets up the hubinfo struct
+ * in nodepda.
+ */
+ for_each_online_node(cnode) {
+ bte_init_node(nodepdaindr[cnode], cnode);
+ }
+
+ /*
+ * Initialize the per node hubdev. This includes IO Nodes and
+ * headless/memless nodes.
+ */
+ for (cnode = 0; cnode < numionodes; cnode++) {
+ hubdev_init_node(nodepdaindr[cnode], cnode);
+ }
+}
+
+/**
+ * sn_cpu_init - initialize per-cpu data areas
+ * @cpuid: cpuid of the caller
+ *
+ * Called during cpu initialization on each cpu as it starts.
+ * Currently, initializes the per-cpu data area for SNIA.
+ * Also sets up a few fields in the nodepda. Also known as
+ * platform_cpu_init() by the ia64 machvec code.
+ */
+void __init sn_cpu_init(void)
+{
+ int cpuid;
+ int cpuphyid;
+ int nasid;
+ int subnode;
+ int slice;
+ int cnode;
+ int i;
+ static int wars_have_been_checked;
+
+ memset(pda, 0, sizeof(pda));
+ if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2, &sn_hub_info->nasid_bitmask, &sn_hub_info->nasid_shift,
+ &sn_system_size, &sn_sharing_domain_size, &sn_partition_id,
+ &sn_coherency_id, &sn_region_size))
+ BUG();
+ sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2;
+
+ /*
+ * The boot cpu makes this call again after platform initialization is
+ * complete.
+ */
+ if (nodepdaindr[0] == NULL)
+ return;
+
+ cpuid = smp_processor_id();
+ cpuphyid = get_sapicid();
+
+ if (ia64_sn_get_sapic_info(cpuphyid, &nasid, &subnode, &slice))
+ BUG();
+
+ for (i=0; i < MAX_NUMNODES; i++) {
+ if (nodepdaindr[i]) {
+ nodepdaindr[i]->phys_cpuid[cpuid].nasid = nasid;
+ nodepdaindr[i]->phys_cpuid[cpuid].slice = slice;
+ nodepdaindr[i]->phys_cpuid[cpuid].subnode = subnode;
+ }
+ }
+
+ cnode = nasid_to_cnodeid(nasid);
+
+ pda->p_nodepda = nodepdaindr[cnode];
+ pda->led_address =
+ (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT));
+ pda->led_state = LED_ALWAYS_SET;
+ pda->hb_count = HZ / 2;
+ pda->hb_state = 0;
+ pda->idle_flag = 0;
+
+ if (cpuid != 0) {
+ memcpy(pda->cnodeid_to_nasid_table,
+ pdacpu(0)->cnodeid_to_nasid_table,
+ sizeof(pda->cnodeid_to_nasid_table));
+ }
+
+ /*
+ * Check for WARs.
+ * Only needs to be done once, on BSP.
+ * Has to be done after loop above, because it uses pda.cnodeid_to_nasid_table[i].
+ * Has to be done before assignment below.
+ */
+ if (!wars_have_been_checked) {
+ sn_check_for_wars();
+ wars_have_been_checked = 1;
+ }
+ sn_hub_info->shub_1_1_found = shub_1_1_found;
+
+ /*
+ * Set up addresses of PIO/MEM write status registers.
+ */
+ {
+ u64 pio1[] = {SH1_PIO_WRITE_STATUS_0, 0, SH1_PIO_WRITE_STATUS_1, 0};
+ u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_1,
+ SH2_PIO_WRITE_STATUS_2, SH2_PIO_WRITE_STATUS_3};
+ u64 *pio;
+ pio = is_shub1() ? pio1 : pio2;
+ pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]);
+ pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0;
+ }
+
+ /*
+ * WAR addresses for SHUB 1.x.
+ */
+ if (local_node_data->active_cpu_count++ == 0 && is_shub1()) {
+ int buddy_nasid;
+ buddy_nasid =
+ cnodeid_to_nasid(numa_node_id() ==
+ num_online_nodes() - 1 ? 0 : numa_node_id() + 1);
+ pda->pio_shub_war_cam_addr =
+ (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid,
+ SH1_PI_CAM_CONTROL);
+ }
+}
+
+/*
+ * Scan klconfig for ionodes. Add the nasids to the
+ * physical_node_map and the pda and increment numionodes.
+ */
+
+static void __init scan_for_ionodes(void)
+{
+ int nasid = 0;
+ lboard_t *brd;
+
+ /* Setup ionodes with memory */
+ for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) {
+ char *klgraph_header;
+ cnodeid_t cnodeid;
+
+ if (physical_node_map[nasid] == -1)
+ continue;
+
+ cnodeid = -1;
+ klgraph_header = __va(ia64_sn_get_klconfig_addr(nasid));
+ if (!klgraph_header) {
+ if (IS_RUNNING_ON_SIMULATOR())
+ continue;
+ BUG(); /* All nodes must have klconfig tables! */
+ }
+ cnodeid = nasid_to_cnodeid(nasid);
+ root_lboard[cnodeid] = (lboard_t *)
+ NODE_OFFSET_TO_LBOARD((nasid),
+ ((kl_config_hdr_t
+ *) (klgraph_header))->
+ ch_board_info);
+ }
+
+ /* Scan headless/memless IO Nodes. */
+ for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) {
+ /* if there's no nasid, don't try to read the klconfig on the node */
+ if (physical_node_map[nasid] == -1)
+ continue;
+ brd = find_lboard_any((lboard_t *)
+ root_lboard[nasid_to_cnodeid(nasid)],
+ KLTYPE_SNIA);
+ if (brd) {
+ brd = KLCF_NEXT_ANY(brd); /* Skip this node's lboard */
+ if (!brd)
+ continue;
+ }
+
+ brd = find_lboard_any(brd, KLTYPE_SNIA);
+
+ while (brd) {
+ pda->cnodeid_to_nasid_table[numionodes] =
+ brd->brd_nasid;
+ physical_node_map[brd->brd_nasid] = numionodes;
+ root_lboard[numionodes] = brd;
+ numionodes++;
+ brd = KLCF_NEXT_ANY(brd);
+ if (!brd)
+ break;
+
+ brd = find_lboard_any(brd, KLTYPE_SNIA);
+ }
+ }
+
+ /* Scan for TIO nodes. */
+ for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) {
+ /* if there's no nasid, don't try to read the klconfig on the node */
+ if (physical_node_map[nasid] == -1)
+ continue;
+ brd = find_lboard_any((lboard_t *)
+ root_lboard[nasid_to_cnodeid(nasid)],
+ KLTYPE_TIO);
+ while (brd) {
+ pda->cnodeid_to_nasid_table[numionodes] =
+ brd->brd_nasid;
+ physical_node_map[brd->brd_nasid] = numionodes;
+ root_lboard[numionodes] = brd;
+ numionodes++;
+ brd = KLCF_NEXT_ANY(brd);
+ if (!brd)
+ break;
+
+ brd = find_lboard_any(brd, KLTYPE_TIO);
+ }
+ }
+
+}
+
+int
+nasid_slice_to_cpuid(int nasid, int slice)
+{
+ long cpu;
+
+ for (cpu=0; cpu < NR_CPUS; cpu++)
+ if (nodepda->phys_cpuid[cpu].nasid == nasid && nodepda->phys_cpuid[cpu].slice == slice)
+ return cpu;
+
+ return -1;
+}
diff --git a/arch/ia64/sn/kernel/sn2/Makefile b/arch/ia64/sn/kernel/sn2/Makefile
new file mode 100644
index 00000000000..170bde4549d
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/Makefile
@@ -0,0 +1,13 @@
+# arch/ia64/sn/kernel/sn2/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1999,2001-2002 Silicon Graphics, Inc. All rights reserved.
+#
+# sn2 specific kernel files
+#
+
+obj-y += cache.o io.o ptc_deadlock.o sn2_smp.o sn_proc_fs.o \
+ prominfo_proc.o timer.o timer_interrupt.o sn_hwperf.o
diff --git a/arch/ia64/sn/kernel/sn2/cache.c b/arch/ia64/sn/kernel/sn2/cache.c
new file mode 100644
index 00000000000..bc3cfa17cd0
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/cache.c
@@ -0,0 +1,34 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001-2003 Silicon Graphics, Inc. All rights reserved.
+ *
+ */
+#include <linux/module.h>
+#include <asm/pgalloc.h>
+
+/**
+ * sn_flush_all_caches - flush a range of address from all caches (incl. L4)
+ * @flush_addr: identity mapped region 7 address to start flushing
+ * @bytes: number of bytes to flush
+ *
+ * Flush a range of addresses from all caches including L4.
+ * All addresses fully or partially contained within
+ * @flush_addr to @flush_addr + @bytes are flushed
+ * from the all caches.
+ */
+void
+sn_flush_all_caches(long flush_addr, long bytes)
+{
+ flush_icache_range(flush_addr, flush_addr+bytes);
+ /*
+ * The last call may have returned before the caches
+ * were actually flushed, so we call it again to make
+ * sure.
+ */
+ flush_icache_range(flush_addr, flush_addr+bytes);
+ mb();
+}
+EXPORT_SYMBOL(sn_flush_all_caches);
diff --git a/arch/ia64/sn/kernel/sn2/io.c b/arch/ia64/sn/kernel/sn2/io.c
new file mode 100644
index 00000000000..a12c0586de3
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/io.c
@@ -0,0 +1,101 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved.
+ *
+ * The generic kernel requires function pointers to these routines, so
+ * we wrap the inlines from asm/ia64/sn/sn2/io.h here.
+ */
+
+#include <asm/sn/io.h>
+
+#ifdef CONFIG_IA64_GENERIC
+
+#undef __sn_inb
+#undef __sn_inw
+#undef __sn_inl
+#undef __sn_outb
+#undef __sn_outw
+#undef __sn_outl
+#undef __sn_readb
+#undef __sn_readw
+#undef __sn_readl
+#undef __sn_readq
+#undef __sn_readb_relaxed
+#undef __sn_readw_relaxed
+#undef __sn_readl_relaxed
+#undef __sn_readq_relaxed
+
+unsigned int __sn_inb(unsigned long port)
+{
+ return ___sn_inb(port);
+}
+
+unsigned int __sn_inw(unsigned long port)
+{
+ return ___sn_inw(port);
+}
+
+unsigned int __sn_inl(unsigned long port)
+{
+ return ___sn_inl(port);
+}
+
+void __sn_outb(unsigned char val, unsigned long port)
+{
+ ___sn_outb(val, port);
+}
+
+void __sn_outw(unsigned short val, unsigned long port)
+{
+ ___sn_outw(val, port);
+}
+
+void __sn_outl(unsigned int val, unsigned long port)
+{
+ ___sn_outl(val, port);
+}
+
+unsigned char __sn_readb(void __iomem *addr)
+{
+ return ___sn_readb(addr);
+}
+
+unsigned short __sn_readw(void __iomem *addr)
+{
+ return ___sn_readw(addr);
+}
+
+unsigned int __sn_readl(void __iomem *addr)
+{
+ return ___sn_readl(addr);
+}
+
+unsigned long __sn_readq(void __iomem *addr)
+{
+ return ___sn_readq(addr);
+}
+
+unsigned char __sn_readb_relaxed(void __iomem *addr)
+{
+ return ___sn_readb_relaxed(addr);
+}
+
+unsigned short __sn_readw_relaxed(void __iomem *addr)
+{
+ return ___sn_readw_relaxed(addr);
+}
+
+unsigned int __sn_readl_relaxed(void __iomem *addr)
+{
+ return ___sn_readl_relaxed(addr);
+}
+
+unsigned long __sn_readq_relaxed(void __iomem *addr)
+{
+ return ___sn_readq_relaxed(addr);
+}
+
+#endif
diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
new file mode 100644
index 00000000000..81c63b2f8ae
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
@@ -0,0 +1,279 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * Module to export the system's Firmware Interface Tables, including
+ * PROM revision numbers and banners, in /proc
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/nodemask.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/addrs.h>
+
+MODULE_DESCRIPTION("PROM version reporting for /proc");
+MODULE_AUTHOR("Chad Talbott");
+MODULE_LICENSE("GPL");
+
+/* Standard Intel FIT entry types */
+#define FIT_ENTRY_FIT_HEADER 0x00 /* FIT header entry */
+#define FIT_ENTRY_PAL_B 0x01 /* PAL_B entry */
+/* Entries 0x02 through 0x0D reserved by Intel */
+#define FIT_ENTRY_PAL_A_PROC 0x0E /* Processor-specific PAL_A entry */
+#define FIT_ENTRY_PAL_A 0x0F /* PAL_A entry, same as... */
+#define FIT_ENTRY_PAL_A_GEN 0x0F /* ...Generic PAL_A entry */
+#define FIT_ENTRY_UNUSED 0x7F /* Unused (reserved by Intel?) */
+/* OEM-defined entries range from 0x10 to 0x7E. */
+#define FIT_ENTRY_SAL_A 0x10 /* SAL_A entry */
+#define FIT_ENTRY_SAL_B 0x11 /* SAL_B entry */
+#define FIT_ENTRY_SALRUNTIME 0x12 /* SAL runtime entry */
+#define FIT_ENTRY_EFI 0x1F /* EFI entry */
+#define FIT_ENTRY_FPSWA 0x20 /* embedded fpswa entry */
+#define FIT_ENTRY_VMLINUX 0x21 /* embedded vmlinux entry */
+
+#define FIT_MAJOR_SHIFT (32 + 8)
+#define FIT_MAJOR_MASK ((1 << 8) - 1)
+#define FIT_MINOR_SHIFT 32
+#define FIT_MINOR_MASK ((1 << 8) - 1)
+
+#define FIT_MAJOR(q) \
+ ((unsigned) ((q) >> FIT_MAJOR_SHIFT) & FIT_MAJOR_MASK)
+#define FIT_MINOR(q) \
+ ((unsigned) ((q) >> FIT_MINOR_SHIFT) & FIT_MINOR_MASK)
+
+#define FIT_TYPE_SHIFT (32 + 16)
+#define FIT_TYPE_MASK ((1 << 7) - 1)
+
+#define FIT_TYPE(q) \
+ ((unsigned) ((q) >> FIT_TYPE_SHIFT) & FIT_TYPE_MASK)
+
+struct fit_type_map_t {
+ unsigned char type;
+ const char *name;
+};
+
+static const struct fit_type_map_t fit_entry_types[] = {
+ {FIT_ENTRY_FIT_HEADER, "FIT Header"},
+ {FIT_ENTRY_PAL_A_GEN, "Generic PAL_A"},
+ {FIT_ENTRY_PAL_A_PROC, "Processor-specific PAL_A"},
+ {FIT_ENTRY_PAL_A, "PAL_A"},
+ {FIT_ENTRY_PAL_B, "PAL_B"},
+ {FIT_ENTRY_SAL_A, "SAL_A"},
+ {FIT_ENTRY_SAL_B, "SAL_B"},
+ {FIT_ENTRY_SALRUNTIME, "SAL runtime"},
+ {FIT_ENTRY_EFI, "EFI"},
+ {FIT_ENTRY_VMLINUX, "Embedded Linux"},
+ {FIT_ENTRY_FPSWA, "Embedded FPSWA"},
+ {FIT_ENTRY_UNUSED, "Unused"},
+ {0xff, "Error"},
+};
+
+static const char *fit_type_name(unsigned char type)
+{
+ struct fit_type_map_t const *mapp;
+
+ for (mapp = fit_entry_types; mapp->type != 0xff; mapp++)
+ if (type == mapp->type)
+ return mapp->name;
+
+ if ((type > FIT_ENTRY_PAL_A) && (type < FIT_ENTRY_UNUSED))
+ return "OEM type";
+ if ((type > FIT_ENTRY_PAL_B) && (type < FIT_ENTRY_PAL_A))
+ return "Reserved";
+
+ return "Unknown type";
+}
+
+static int
+get_fit_entry(unsigned long nasid, int index, unsigned long *fentry,
+ char *banner, int banlen)
+{
+ return ia64_sn_get_fit_compt(nasid, index, fentry, banner, banlen);
+}
+
+
+/*
+ * These two routines display the FIT table for each node.
+ */
+static int dump_fit_entry(char *page, unsigned long *fentry)
+{
+ unsigned type;
+
+ type = FIT_TYPE(fentry[1]);
+ return sprintf(page, "%02x %-25s %x.%02x %016lx %u\n",
+ type,
+ fit_type_name(type),
+ FIT_MAJOR(fentry[1]), FIT_MINOR(fentry[1]),
+ fentry[0],
+ /* mult by sixteen to get size in bytes */
+ (unsigned)(fentry[1] & 0xffffff) * 16);
+}
+
+
+/*
+ * We assume that the fit table will be small enough that we can print
+ * the whole thing into one page. (This is true for our default 16kB
+ * pages -- each entry is about 60 chars wide when printed.) I read
+ * somewhere that the maximum size of the FIT is 128 entries, so we're
+ * OK except for 4kB pages (and no one is going to do that on SN
+ * anyway).
+ */
+static int
+dump_fit(char *page, unsigned long nasid)
+{
+ unsigned long fentry[2];
+ int index;
+ char *p;
+
+ p = page;
+ for (index=0;;index++) {
+ BUG_ON(index * 60 > PAGE_SIZE);
+ if (get_fit_entry(nasid, index, fentry, NULL, 0))
+ break;
+ p += dump_fit_entry(p, fentry);
+ }
+
+ return p - page;
+}
+
+static int
+dump_version(char *page, unsigned long nasid)
+{
+ unsigned long fentry[2];
+ char banner[128];
+ int index;
+ int len;
+
+ for (index = 0; ; index++) {
+ if (get_fit_entry(nasid, index, fentry, banner,
+ sizeof(banner)))
+ return 0;
+ if (FIT_TYPE(fentry[1]) == FIT_ENTRY_SAL_A)
+ break;
+ }
+
+ len = sprintf(page, "%x.%02x\n", FIT_MAJOR(fentry[1]),
+ FIT_MINOR(fentry[1]));
+ page += len;
+
+ if (banner[0])
+ len += snprintf(page, PAGE_SIZE-len, "%s\n", banner);
+
+ return len;
+}
+
+/* same as in proc_misc.c */
+static int
+proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof,
+ int len)
+{
+ if (len <= off + count)
+ *eof = 1;
+ *start = page + off;
+ len -= off;
+ if (len > count)
+ len = count;
+ if (len < 0)
+ len = 0;
+ return len;
+}
+
+static int
+read_version_entry(char *page, char **start, off_t off, int count, int *eof,
+ void *data)
+{
+ int len = 0;
+
+ /* data holds the NASID of the node */
+ len = dump_version(page, (unsigned long)data);
+ len = proc_calc_metrics(page, start, off, count, eof, len);
+ return len;
+}
+
+static int
+read_fit_entry(char *page, char **start, off_t off, int count, int *eof,
+ void *data)
+{
+ int len = 0;
+
+ /* data holds the NASID of the node */
+ len = dump_fit(page, (unsigned long)data);
+ len = proc_calc_metrics(page, start, off, count, eof, len);
+
+ return len;
+}
+
+/* module entry points */
+int __init prominfo_init(void);
+void __exit prominfo_exit(void);
+
+module_init(prominfo_init);
+module_exit(prominfo_exit);
+
+static struct proc_dir_entry **proc_entries;
+static struct proc_dir_entry *sgi_prominfo_entry;
+
+#define NODE_NAME_LEN 11
+
+int __init prominfo_init(void)
+{
+ struct proc_dir_entry **entp;
+ struct proc_dir_entry *p;
+ cnodeid_t cnodeid;
+ unsigned long nasid;
+ char name[NODE_NAME_LEN];
+
+ if (!ia64_platform_is("sn2"))
+ return 0;
+
+ proc_entries = kmalloc(num_online_nodes() * sizeof(struct proc_dir_entry *),
+ GFP_KERNEL);
+
+ sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL);
+
+ entp = proc_entries;
+ for_each_online_node(cnodeid) {
+ sprintf(name, "node%d", cnodeid);
+ *entp = proc_mkdir(name, sgi_prominfo_entry);
+ nasid = cnodeid_to_nasid(cnodeid);
+ p = create_proc_read_entry(
+ "fit", 0, *entp, read_fit_entry,
+ (void *)nasid);
+ if (p)
+ p->owner = THIS_MODULE;
+ p = create_proc_read_entry(
+ "version", 0, *entp, read_version_entry,
+ (void *)nasid);
+ if (p)
+ p->owner = THIS_MODULE;
+ entp++;
+ }
+
+ return 0;
+}
+
+void __exit prominfo_exit(void)
+{
+ struct proc_dir_entry **entp;
+ unsigned cnodeid;
+ char name[NODE_NAME_LEN];
+
+ entp = proc_entries;
+ for_each_online_node(cnodeid) {
+ remove_proc_entry("fit", *entp);
+ remove_proc_entry("version", *entp);
+ sprintf(name, "node%d", cnodeid);
+ remove_proc_entry(name, sgi_prominfo_entry);
+ entp++;
+ }
+ remove_proc_entry("sgi_prominfo", NULL);
+ kfree(proc_entries);
+}
diff --git a/arch/ia64/sn/kernel/sn2/ptc_deadlock.S b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S
new file mode 100644
index 00000000000..7947312801e
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S
@@ -0,0 +1,82 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <asm/sn/shub_mmr.h>
+
+#define DEADLOCKBIT SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_SHFT
+#define WRITECOUNTMASK SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK
+#define ALIAS_OFFSET (SH1_PIO_WRITE_STATUS_0_ALIAS-SH1_PIO_WRITE_STATUS_0)
+
+
+ .global sn2_ptc_deadlock_recovery_core
+ .proc sn2_ptc_deadlock_recovery_core
+
+sn2_ptc_deadlock_recovery_core:
+ .regstk 6,0,0,0
+
+ ptc0 = in0
+ data0 = in1
+ ptc1 = in2
+ data1 = in3
+ piowc = in4
+ zeroval = in5
+ piowcphy = r30
+ psrsave = r2
+ scr1 = r16
+ scr2 = r17
+ mask = r18
+
+
+ extr.u piowcphy=piowc,0,61;; // Convert piowc to uncached physical address
+ dep piowcphy=-1,piowcphy,63,1
+ movl mask=WRITECOUNTMASK
+
+1:
+ add scr2=ALIAS_OFFSET,piowc // Address of WRITE_STATUS alias register
+ mov scr1=7;; // Clear DEADLOCK, WRITE_ERROR, MULTI_WRITE_ERROR
+ st8.rel [scr2]=scr1;;
+
+5: ld8.acq scr1=[piowc];; // Wait for PIOs to complete.
+ and scr2=scr1,mask;; // mask of writecount bits
+ cmp.ne p6,p0=zeroval,scr2
+(p6) br.cond.sptk 5b
+
+
+
+ ////////////// BEGIN PHYSICAL MODE ////////////////////
+ mov psrsave=psr // Disable IC (no PMIs)
+ rsm psr.i | psr.dt | psr.ic;;
+ srlz.i;;
+
+ st8.rel [ptc0]=data0 // Write PTC0 & wait for completion.
+
+5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete.
+ and scr2=scr1,mask;; // mask of writecount bits
+ cmp.ne p6,p0=zeroval,scr2
+(p6) br.cond.sptk 5b;;
+
+ tbit.nz p8,p7=scr1,DEADLOCKBIT;;// Test for DEADLOCK
+(p7) cmp.ne p7,p0=r0,ptc1;; // Test for non-null ptc1
+
+(p7) st8.rel [ptc1]=data1;; // Now write PTC1.
+
+5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete.
+ and scr2=scr1,mask;; // mask of writecount bits
+ cmp.ne p6,p0=zeroval,scr2
+(p6) br.cond.sptk 5b
+
+ tbit.nz p8,p0=scr1,DEADLOCKBIT;;// Test for DEADLOCK
+
+ mov psr.l=psrsave;; // Reenable IC
+ srlz.i;;
+ ////////////// END PHYSICAL MODE ////////////////////
+
+(p8) br.cond.spnt 1b;; // Repeat if DEADLOCK occurred.
+
+ br.ret.sptk rp
+ .endp sn2_ptc_deadlock_recovery_core
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
new file mode 100644
index 00000000000..7af05a7ac74
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -0,0 +1,295 @@
+/*
+ * SN2 Platform specific SMP Support
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/nodemask.h>
+
+#include <asm/processor.h>
+#include <asm/irq.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/tlb.h>
+#include <asm/numa.h>
+#include <asm/hw_irq.h>
+#include <asm/current.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/shub_mmr.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/rw_mmr.h>
+
+void sn2_ptc_deadlock_recovery(volatile unsigned long *, unsigned long data0,
+ volatile unsigned long *, unsigned long data1);
+
+static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
+
+static unsigned long sn2_ptc_deadlock_count;
+
+static inline unsigned long wait_piowc(void)
+{
+ volatile unsigned long *piows, zeroval;
+ unsigned long ws;
+
+ piows = pda->pio_write_status_addr;
+ zeroval = pda->pio_write_status_val;
+ do {
+ cpu_relax();
+ } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval);
+ return ws;
+}
+
+void sn_tlb_migrate_finish(struct mm_struct *mm)
+{
+ if (mm == current->mm)
+ flush_tlb_mm(mm);
+}
+
+/**
+ * sn2_global_tlb_purge - globally purge translation cache of virtual address range
+ * @start: start of virtual address range
+ * @end: end of virtual address range
+ * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
+ *
+ * Purges the translation caches of all processors of the given virtual address
+ * range.
+ *
+ * Note:
+ * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context.
+ * - cpu_vm_mask is converted into a nodemask of the nodes containing the
+ * cpus in cpu_vm_mask.
+ * - if only one bit is set in cpu_vm_mask & it is the current cpu,
+ * then only the local TLB needs to be flushed. This flushing can be done
+ * using ptc.l. This is the common case & avoids the global spinlock.
+ * - if multiple cpus have loaded the context, then flushing has to be
+ * done with ptc.g/MMRs under protection of the global ptc_lock.
+ */
+
+void
+sn2_global_tlb_purge(unsigned long start, unsigned long end,
+ unsigned long nbits)
+{
+ int i, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0;
+ volatile unsigned long *ptc0, *ptc1;
+ unsigned long flags = 0, data0 = 0, data1 = 0;
+ struct mm_struct *mm = current->active_mm;
+ short nasids[MAX_NUMNODES], nix;
+ nodemask_t nodes_flushed;
+
+ nodes_clear(nodes_flushed);
+ i = 0;
+
+ for_each_cpu_mask(cpu, mm->cpu_vm_mask) {
+ cnode = cpu_to_node(cpu);
+ node_set(cnode, nodes_flushed);
+ lcpu = cpu;
+ i++;
+ }
+
+ preempt_disable();
+
+ if (likely(i == 1 && lcpu == smp_processor_id())) {
+ do {
+ ia64_ptcl(start, nbits << 2);
+ start += (1UL << nbits);
+ } while (start < end);
+ ia64_srlz_i();
+ preempt_enable();
+ return;
+ }
+
+ if (atomic_read(&mm->mm_users) == 1) {
+ flush_tlb_mm(mm);
+ preempt_enable();
+ return;
+ }
+
+ nix = 0;
+ for_each_node_mask(cnode, nodes_flushed)
+ nasids[nix++] = cnodeid_to_nasid(cnode);
+
+ shub1 = is_shub1();
+ if (shub1) {
+ data0 = (1UL << SH1_PTC_0_A_SHFT) |
+ (nbits << SH1_PTC_0_PS_SHFT) |
+ ((ia64_get_rr(start) >> 8) << SH1_PTC_0_RID_SHFT) |
+ (1UL << SH1_PTC_0_START_SHFT);
+ ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0);
+ ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1);
+ } else {
+ data0 = (1UL << SH2_PTC_A_SHFT) |
+ (nbits << SH2_PTC_PS_SHFT) |
+ (1UL << SH2_PTC_START_SHFT);
+ ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC +
+ ((ia64_get_rr(start) >> 8) << SH2_PTC_RID_SHFT) );
+ ptc1 = NULL;
+ }
+
+
+ mynasid = get_nasid();
+
+ spin_lock_irqsave(&sn2_global_ptc_lock, flags);
+
+ do {
+ if (shub1)
+ data1 = start | (1UL << SH1_PTC_1_START_SHFT);
+ else
+ data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
+ for (i = 0; i < nix; i++) {
+ nasid = nasids[i];
+ if (unlikely(nasid == mynasid)) {
+ ia64_ptcga(start, nbits << 2);
+ ia64_srlz_i();
+ } else {
+ ptc0 = CHANGE_NASID(nasid, ptc0);
+ if (ptc1)
+ ptc1 = CHANGE_NASID(nasid, ptc1);
+ pio_atomic_phys_write_mmrs(ptc0, data0, ptc1,
+ data1);
+ flushed = 1;
+ }
+ }
+
+ if (flushed
+ && (wait_piowc() &
+ SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK)) {
+ sn2_ptc_deadlock_recovery(ptc0, data0, ptc1, data1);
+ }
+
+ start += (1UL << nbits);
+
+ } while (start < end);
+
+ spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
+
+ preempt_enable();
+}
+
+/*
+ * sn2_ptc_deadlock_recovery
+ *
+ * Recover from PTC deadlocks conditions. Recovery requires stepping thru each
+ * TLB flush transaction. The recovery sequence is somewhat tricky & is
+ * coded in assembly language.
+ */
+void sn2_ptc_deadlock_recovery(volatile unsigned long *ptc0, unsigned long data0,
+ volatile unsigned long *ptc1, unsigned long data1)
+{
+ extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
+ volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long);
+ int cnode, mycnode, nasid;
+ volatile unsigned long *piows;
+ volatile unsigned long zeroval;
+
+ sn2_ptc_deadlock_count++;
+
+ piows = pda->pio_write_status_addr;
+ zeroval = pda->pio_write_status_val;
+
+ mycnode = numa_node_id();
+
+ for_each_online_node(cnode) {
+ if (is_headless_node(cnode) || cnode == mycnode)
+ continue;
+ nasid = cnodeid_to_nasid(cnode);
+ ptc0 = CHANGE_NASID(nasid, ptc0);
+ if (ptc1)
+ ptc1 = CHANGE_NASID(nasid, ptc1);
+ sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
+ }
+}
+
+/**
+ * sn_send_IPI_phys - send an IPI to a Nasid and slice
+ * @nasid: nasid to receive the interrupt (may be outside partition)
+ * @physid: physical cpuid to receive the interrupt.
+ * @vector: command to send
+ * @delivery_mode: delivery mechanism
+ *
+ * Sends an IPI (interprocessor interrupt) to the processor specified by
+ * @physid
+ *
+ * @delivery_mode can be one of the following
+ *
+ * %IA64_IPI_DM_INT - pend an interrupt
+ * %IA64_IPI_DM_PMI - pend a PMI
+ * %IA64_IPI_DM_NMI - pend an NMI
+ * %IA64_IPI_DM_INIT - pend an INIT interrupt
+ */
+void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode)
+{
+ long val;
+ unsigned long flags = 0;
+ volatile long *p;
+
+ p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT);
+ val = (1UL << SH_IPI_INT_SEND_SHFT) |
+ (physid << SH_IPI_INT_PID_SHFT) |
+ ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) |
+ ((long)vector << SH_IPI_INT_IDX_SHFT) |
+ (0x000feeUL << SH_IPI_INT_BASE_SHFT);
+
+ mb();
+ if (enable_shub_wars_1_1()) {
+ spin_lock_irqsave(&sn2_global_ptc_lock, flags);
+ }
+ pio_phys_write_mmr(p, val);
+ if (enable_shub_wars_1_1()) {
+ wait_piowc();
+ spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
+ }
+
+}
+
+EXPORT_SYMBOL(sn_send_IPI_phys);
+
+/**
+ * sn2_send_IPI - send an IPI to a processor
+ * @cpuid: target of the IPI
+ * @vector: command to send
+ * @delivery_mode: delivery mechanism
+ * @redirect: redirect the IPI?
+ *
+ * Sends an IPI (InterProcessor Interrupt) to the processor specified by
+ * @cpuid. @vector specifies the command to send, while @delivery_mode can
+ * be one of the following
+ *
+ * %IA64_IPI_DM_INT - pend an interrupt
+ * %IA64_IPI_DM_PMI - pend a PMI
+ * %IA64_IPI_DM_NMI - pend an NMI
+ * %IA64_IPI_DM_INIT - pend an INIT interrupt
+ */
+void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
+{
+ long physid;
+ int nasid;
+
+ physid = cpu_physical_id(cpuid);
+ nasid = cpuid_to_nasid(cpuid);
+
+ /* the following is used only when starting cpus at boot time */
+ if (unlikely(nasid == -1))
+ ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL);
+
+ sn_send_IPI_phys(nasid, physid, vector, delivery_mode);
+}
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
new file mode 100644
index 00000000000..197356460ee
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -0,0 +1,690 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2005 Silicon Graphics, Inc. All rights reserved.
+ *
+ * SGI Altix topology and hardware performance monitoring API.
+ * Mark Goodwin <markgw@sgi.com>.
+ *
+ * Creates /proc/sgi_sn/sn_topology (read-only) to export
+ * info about Altix nodes, routers, CPUs and NumaLink
+ * interconnection/topology.
+ *
+ * Also creates a dynamic misc device named "sn_hwperf"
+ * that supports an ioctl interface to call down into SAL
+ * to discover hw objects, topology and to read/write
+ * memory mapped registers, e.g. for performance monitoring.
+ * The "sn_hwperf" device is registered only after the procfs
+ * file is first opened, i.e. only if/when it's needed.
+ *
+ * This API is used by SGI Performance Co-Pilot and other
+ * tools, see http://oss.sgi.com/projects/pcp
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/seq_file.h>
+#include <linux/miscdevice.h>
+#include <linux/cpumask.h>
+#include <linux/smp_lock.h>
+#include <linux/nodemask.h>
+#include <asm/processor.h>
+#include <asm/topology.h>
+#include <asm/smp.h>
+#include <asm/semaphore.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <asm/sal.h>
+#include <asm/sn/io.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/module.h>
+#include <asm/sn/geo.h>
+#include <asm/sn/sn2/sn_hwperf.h>
+
+static void *sn_hwperf_salheap = NULL;
+static int sn_hwperf_obj_cnt = 0;
+static nasid_t sn_hwperf_master_nasid = INVALID_NASID;
+static int sn_hwperf_init(void);
+static DECLARE_MUTEX(sn_hwperf_init_mutex);
+
+static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret)
+{
+ int e;
+ u64 sz;
+ struct sn_hwperf_object_info *objbuf = NULL;
+
+ if ((e = sn_hwperf_init()) < 0) {
+ printk("sn_hwperf_init failed: err %d\n", e);
+ goto out;
+ }
+
+ sz = sn_hwperf_obj_cnt * sizeof(struct sn_hwperf_object_info);
+ if ((objbuf = (struct sn_hwperf_object_info *) vmalloc(sz)) == NULL) {
+ printk("sn_hwperf_enum_objects: vmalloc(%d) failed\n", (int)sz);
+ e = -ENOMEM;
+ goto out;
+ }
+
+ e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, SN_HWPERF_ENUM_OBJECTS,
+ 0, sz, (u64) objbuf, 0, 0, NULL);
+ if (e != SN_HWPERF_OP_OK) {
+ e = -EINVAL;
+ vfree(objbuf);
+ }
+
+out:
+ *nobj = sn_hwperf_obj_cnt;
+ *ret = objbuf;
+ return e;
+}
+
+static int sn_hwperf_geoid_to_cnode(char *location)
+{
+ int cnode;
+ geoid_t geoid;
+ moduleid_t module_id;
+ char type;
+ int rack, slot, slab;
+ int this_rack, this_slot, this_slab;
+
+ if (sscanf(location, "%03d%c%02d#%d", &rack, &type, &slot, &slab) != 4)
+ return -1;
+
+ for (cnode = 0; cnode < numionodes; cnode++) {
+ geoid = cnodeid_get_geoid(cnode);
+ module_id = geo_module(geoid);
+ this_rack = MODULE_GET_RACK(module_id);
+ this_slot = MODULE_GET_BPOS(module_id);
+ this_slab = geo_slab(geoid);
+ if (rack == this_rack && slot == this_slot && slab == this_slab)
+ break;
+ }
+
+ return cnode < numionodes ? cnode : -1;
+}
+
+static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj)
+{
+ if (!obj->sn_hwp_this_part)
+ return -1;
+ return sn_hwperf_geoid_to_cnode(obj->location);
+}
+
+static int sn_hwperf_generic_ordinal(struct sn_hwperf_object_info *obj,
+ struct sn_hwperf_object_info *objs)
+{
+ int ordinal;
+ struct sn_hwperf_object_info *p;
+
+ for (ordinal=0, p=objs; p != obj; p++) {
+ if (SN_HWPERF_FOREIGN(p))
+ continue;
+ if (SN_HWPERF_SAME_OBJTYPE(p, obj))
+ ordinal++;
+ }
+
+ return ordinal;
+}
+
+static const char *slabname_node = "node"; /* SHub asic */
+static const char *slabname_ionode = "ionode"; /* TIO asic */
+static const char *slabname_router = "router"; /* NL3R or NL4R */
+static const char *slabname_other = "other"; /* unknown asic */
+
+static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj,
+ struct sn_hwperf_object_info *objs, int *ordinal)
+{
+ int isnode;
+ const char *slabname = slabname_other;
+
+ if ((isnode = SN_HWPERF_IS_NODE(obj)) || SN_HWPERF_IS_IONODE(obj)) {
+ slabname = isnode ? slabname_node : slabname_ionode;
+ *ordinal = sn_hwperf_obj_to_cnode(obj);
+ }
+ else {
+ *ordinal = sn_hwperf_generic_ordinal(obj, objs);
+ if (SN_HWPERF_IS_ROUTER(obj))
+ slabname = slabname_router;
+ }
+
+ return slabname;
+}
+
+static int sn_topology_show(struct seq_file *s, void *d)
+{
+ int sz;
+ int pt;
+ int e;
+ int i;
+ int j;
+ const char *slabname;
+ int ordinal;
+ cpumask_t cpumask;
+ char slice;
+ struct cpuinfo_ia64 *c;
+ struct sn_hwperf_port_info *ptdata;
+ struct sn_hwperf_object_info *p;
+ struct sn_hwperf_object_info *obj = d; /* this object */
+ struct sn_hwperf_object_info *objs = s->private; /* all objects */
+
+ if (obj == objs) {
+ seq_printf(s, "# sn_topology version 1\n");
+ seq_printf(s, "# objtype ordinal location partition"
+ " [attribute value [, ...]]\n");
+ }
+
+ if (SN_HWPERF_FOREIGN(obj)) {
+ /* private in another partition: not interesting */
+ return 0;
+ }
+
+ for (i = 0; obj->name[i]; i++) {
+ if (obj->name[i] == ' ')
+ obj->name[i] = '_';
+ }
+
+ slabname = sn_hwperf_get_slabname(obj, objs, &ordinal);
+ seq_printf(s, "%s %d %s %s asic %s", slabname, ordinal, obj->location,
+ obj->sn_hwp_this_part ? "local" : "shared", obj->name);
+
+ if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))
+ seq_putc(s, '\n');
+ else {
+ seq_printf(s, ", nasid 0x%x", cnodeid_to_nasid(ordinal));
+ for (i=0; i < numionodes; i++) {
+ seq_printf(s, i ? ":%d" : ", dist %d",
+ node_distance(ordinal, i));
+ }
+ seq_putc(s, '\n');
+
+ /*
+ * CPUs on this node, if any
+ */
+ cpumask = node_to_cpumask(ordinal);
+ for_each_online_cpu(i) {
+ if (cpu_isset(i, cpumask)) {
+ slice = 'a' + cpuid_to_slice(i);
+ c = cpu_data(i);
+ seq_printf(s, "cpu %d %s%c local"
+ " freq %luMHz, arch ia64",
+ i, obj->location, slice,
+ c->proc_freq / 1000000);
+ for_each_online_cpu(j) {
+ seq_printf(s, j ? ":%d" : ", dist %d",
+ node_distance(
+ cpuid_to_cnodeid(i),
+ cpuid_to_cnodeid(j)));
+ }
+ seq_putc(s, '\n');
+ }
+ }
+ }
+
+ if (obj->ports) {
+ /*
+ * numalink ports
+ */
+ sz = obj->ports * sizeof(struct sn_hwperf_port_info);
+ if ((ptdata = vmalloc(sz)) == NULL)
+ return -ENOMEM;
+ e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
+ SN_HWPERF_ENUM_PORTS, obj->id, sz,
+ (u64) ptdata, 0, 0, NULL);
+ if (e != SN_HWPERF_OP_OK)
+ return -EINVAL;
+ for (ordinal=0, p=objs; p != obj; p++) {
+ if (!SN_HWPERF_FOREIGN(p))
+ ordinal += p->ports;
+ }
+ for (pt = 0; pt < obj->ports; pt++) {
+ for (p = objs, i = 0; i < sn_hwperf_obj_cnt; i++, p++) {
+ if (ptdata[pt].conn_id == p->id) {
+ break;
+ }
+ }
+ seq_printf(s, "numalink %d %s-%d",
+ ordinal+pt, obj->location, ptdata[pt].port);
+
+ if (i >= sn_hwperf_obj_cnt) {
+ /* no connection */
+ seq_puts(s, " local endpoint disconnected"
+ ", protocol unknown\n");
+ continue;
+ }
+
+ if (obj->sn_hwp_this_part && p->sn_hwp_this_part)
+ /* both ends local to this partition */
+ seq_puts(s, " local");
+ else if (!obj->sn_hwp_this_part && !p->sn_hwp_this_part)
+ /* both ends of the link in foreign partiton */
+ seq_puts(s, " foreign");
+ else
+ /* link straddles a partition */
+ seq_puts(s, " shared");
+
+ /*
+ * Unlikely, but strictly should query the LLP config
+ * registers because an NL4R can be configured to run
+ * NL3 protocol, even when not talking to an NL3 router.
+ * Ditto for node-node.
+ */
+ seq_printf(s, " endpoint %s-%d, protocol %s\n",
+ p->location, ptdata[pt].conn_port,
+ (SN_HWPERF_IS_NL3ROUTER(obj) ||
+ SN_HWPERF_IS_NL3ROUTER(p)) ? "LLP3" : "LLP4");
+ }
+ vfree(ptdata);
+ }
+
+ return 0;
+}
+
+static void *sn_topology_start(struct seq_file *s, loff_t * pos)
+{
+ struct sn_hwperf_object_info *objs = s->private;
+
+ if (*pos < sn_hwperf_obj_cnt)
+ return (void *)(objs + *pos);
+
+ return NULL;
+}
+
+static void *sn_topology_next(struct seq_file *s, void *v, loff_t * pos)
+{
+ ++*pos;
+ return sn_topology_start(s, pos);
+}
+
+static void sn_topology_stop(struct seq_file *m, void *v)
+{
+ return;
+}
+
+/*
+ * /proc/sgi_sn/sn_topology, read-only using seq_file
+ */
+static struct seq_operations sn_topology_seq_ops = {
+ .start = sn_topology_start,
+ .next = sn_topology_next,
+ .stop = sn_topology_stop,
+ .show = sn_topology_show
+};
+
+struct sn_hwperf_op_info {
+ u64 op;
+ struct sn_hwperf_ioctl_args *a;
+ void *p;
+ int *v0;
+ int ret;
+};
+
+static void sn_hwperf_call_sal(void *info)
+{
+ struct sn_hwperf_op_info *op_info = info;
+ int r;
+
+ r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op_info->op,
+ op_info->a->arg, op_info->a->sz,
+ (u64) op_info->p, 0, 0, op_info->v0);
+ op_info->ret = r;
+}
+
+static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info)
+{
+ u32 cpu;
+ u32 use_ipi;
+ int r = 0;
+ cpumask_t save_allowed;
+
+ cpu = (op_info->a->arg & SN_HWPERF_ARG_CPU_MASK) >> 32;
+ use_ipi = op_info->a->arg & SN_HWPERF_ARG_USE_IPI_MASK;
+ op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK;
+
+ if (cpu != SN_HWPERF_ARG_ANY_CPU) {
+ if (cpu >= num_online_cpus() || !cpu_online(cpu)) {
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (cpu == SN_HWPERF_ARG_ANY_CPU || cpu == get_cpu()) {
+ /* don't care, or already on correct cpu */
+ sn_hwperf_call_sal(op_info);
+ }
+ else {
+ if (use_ipi) {
+ /* use an interprocessor interrupt to call SAL */
+ smp_call_function_single(cpu, sn_hwperf_call_sal,
+ op_info, 1, 1);
+ }
+ else {
+ /* migrate the task before calling SAL */
+ save_allowed = current->cpus_allowed;
+ set_cpus_allowed(current, cpumask_of_cpu(cpu));
+ sn_hwperf_call_sal(op_info);
+ set_cpus_allowed(current, save_allowed);
+ }
+ }
+ r = op_info->ret;
+
+out:
+ return r;
+}
+
+/* map SAL hwperf error code to system error code */
+static int sn_hwperf_map_err(int hwperf_err)
+{
+ int e;
+
+ switch(hwperf_err) {
+ case SN_HWPERF_OP_OK:
+ e = 0;
+ break;
+
+ case SN_HWPERF_OP_NOMEM:
+ e = -ENOMEM;
+ break;
+
+ case SN_HWPERF_OP_NO_PERM:
+ e = -EPERM;
+ break;
+
+ case SN_HWPERF_OP_IO_ERROR:
+ e = -EIO;
+ break;
+
+ case SN_HWPERF_OP_BUSY:
+ case SN_HWPERF_OP_RECONFIGURE:
+ e = -EAGAIN;
+ break;
+
+ case SN_HWPERF_OP_INVAL:
+ default:
+ e = -EINVAL;
+ break;
+ }
+
+ return e;
+}
+
+/*
+ * ioctl for "sn_hwperf" misc device
+ */
+static int
+sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg)
+{
+ struct sn_hwperf_ioctl_args a;
+ struct cpuinfo_ia64 *cdata;
+ struct sn_hwperf_object_info *objs;
+ struct sn_hwperf_object_info *cpuobj;
+ struct sn_hwperf_op_info op_info;
+ void *p = NULL;
+ int nobj;
+ char slice;
+ int node;
+ int r;
+ int v0;
+ int i;
+ int j;
+
+ unlock_kernel();
+
+ /* only user requests are allowed here */
+ if ((op & SN_HWPERF_OP_MASK) < 10) {
+ r = -EINVAL;
+ goto error;
+ }
+ r = copy_from_user(&a, (const void __user *)arg,
+ sizeof(struct sn_hwperf_ioctl_args));
+ if (r != 0) {
+ r = -EFAULT;
+ goto error;
+ }
+
+ /*
+ * Allocate memory to hold a kernel copy of the user buffer. The
+ * buffer contents are either copied in or out (or both) of user
+ * space depending on the flags encoded in the requested operation.
+ */
+ if (a.ptr) {
+ p = vmalloc(a.sz);
+ if (!p) {
+ r = -ENOMEM;
+ goto error;
+ }
+ }
+
+ if (op & SN_HWPERF_OP_MEM_COPYIN) {
+ r = copy_from_user(p, (const void __user *)a.ptr, a.sz);
+ if (r != 0) {
+ r = -EFAULT;
+ goto error;
+ }
+ }
+
+ switch (op) {
+ case SN_HWPERF_GET_CPU_INFO:
+ if (a.sz == sizeof(u64)) {
+ /* special case to get size needed */
+ *(u64 *) p = (u64) num_online_cpus() *
+ sizeof(struct sn_hwperf_object_info);
+ } else
+ if (a.sz < num_online_cpus() * sizeof(struct sn_hwperf_object_info)) {
+ r = -ENOMEM;
+ goto error;
+ } else
+ if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) {
+ memset(p, 0, a.sz);
+ for (i = 0; i < nobj; i++) {
+ node = sn_hwperf_obj_to_cnode(objs + i);
+ for_each_online_cpu(j) {
+ if (node != cpu_to_node(j))
+ continue;
+ cpuobj = (struct sn_hwperf_object_info *) p + j;
+ slice = 'a' + cpuid_to_slice(j);
+ cdata = cpu_data(j);
+ cpuobj->id = j;
+ snprintf(cpuobj->name,
+ sizeof(cpuobj->name),
+ "CPU %luMHz %s",
+ cdata->proc_freq / 1000000,
+ cdata->vendor);
+ snprintf(cpuobj->location,
+ sizeof(cpuobj->location),
+ "%s%c", objs[i].location,
+ slice);
+ }
+ }
+
+ vfree(objs);
+ }
+ break;
+
+ case SN_HWPERF_GET_NODE_NASID:
+ if (a.sz != sizeof(u64) ||
+ (node = a.arg) < 0 || node >= numionodes) {
+ r = -EINVAL;
+ goto error;
+ }
+ *(u64 *)p = (u64)cnodeid_to_nasid(node);
+ break;
+
+ case SN_HWPERF_GET_OBJ_NODE:
+ if (a.sz != sizeof(u64) || a.arg < 0) {
+ r = -EINVAL;
+ goto error;
+ }
+ if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) {
+ if (a.arg >= nobj) {
+ r = -EINVAL;
+ vfree(objs);
+ goto error;
+ }
+ if (objs[(i = a.arg)].id != a.arg) {
+ for (i = 0; i < nobj; i++) {
+ if (objs[i].id == a.arg)
+ break;
+ }
+ }
+ if (i == nobj) {
+ r = -EINVAL;
+ vfree(objs);
+ goto error;
+ }
+ *(u64 *)p = (u64)sn_hwperf_obj_to_cnode(objs + i);
+ vfree(objs);
+ }
+ break;
+
+ case SN_HWPERF_GET_MMRS:
+ case SN_HWPERF_SET_MMRS:
+ case SN_HWPERF_OBJECT_DISTANCE:
+ op_info.p = p;
+ op_info.a = &a;
+ op_info.v0 = &v0;
+ op_info.op = op;
+ r = sn_hwperf_op_cpu(&op_info);
+ if (r) {
+ r = sn_hwperf_map_err(r);
+ goto error;
+ }
+ break;
+
+ default:
+ /* all other ops are a direct SAL call */
+ r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op,
+ a.arg, a.sz, (u64) p, 0, 0, &v0);
+ if (r) {
+ r = sn_hwperf_map_err(r);
+ goto error;
+ }
+ a.v0 = v0;
+ break;
+ }
+
+ if (op & SN_HWPERF_OP_MEM_COPYOUT) {
+ r = copy_to_user((void __user *)a.ptr, p, a.sz);
+ if (r != 0) {
+ r = -EFAULT;
+ goto error;
+ }
+ }
+
+error:
+ vfree(p);
+
+ lock_kernel();
+ return r;
+}
+
+static struct file_operations sn_hwperf_fops = {
+ .ioctl = sn_hwperf_ioctl,
+};
+
+static struct miscdevice sn_hwperf_dev = {
+ MISC_DYNAMIC_MINOR,
+ "sn_hwperf",
+ &sn_hwperf_fops
+};
+
+static int sn_hwperf_init(void)
+{
+ u64 v;
+ int salr;
+ int e = 0;
+
+ /* single threaded, once-only initialization */
+ down(&sn_hwperf_init_mutex);
+ if (sn_hwperf_salheap) {
+ up(&sn_hwperf_init_mutex);
+ return e;
+ }
+
+ /*
+ * The PROM code needs a fixed reference node. For convenience the
+ * same node as the console I/O is used.
+ */
+ sn_hwperf_master_nasid = (nasid_t) ia64_sn_get_console_nasid();
+
+ /*
+ * Request the needed size and install the PROM scratch area.
+ * The PROM keeps various tracking bits in this memory area.
+ */
+ salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
+ (u64) SN_HWPERF_GET_HEAPSIZE, 0,
+ (u64) sizeof(u64), (u64) &v, 0, 0, NULL);
+ if (salr != SN_HWPERF_OP_OK) {
+ e = -EINVAL;
+ goto out;
+ }
+
+ if ((sn_hwperf_salheap = vmalloc(v)) == NULL) {
+ e = -ENOMEM;
+ goto out;
+ }
+ salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
+ SN_HWPERF_INSTALL_HEAP, 0, v,
+ (u64) sn_hwperf_salheap, 0, 0, NULL);
+ if (salr != SN_HWPERF_OP_OK) {
+ e = -EINVAL;
+ goto out;
+ }
+
+ salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
+ SN_HWPERF_OBJECT_COUNT, 0,
+ sizeof(u64), (u64) &v, 0, 0, NULL);
+ if (salr != SN_HWPERF_OP_OK) {
+ e = -EINVAL;
+ goto out;
+ }
+ sn_hwperf_obj_cnt = (int)v;
+
+out:
+ if (e < 0 && sn_hwperf_salheap) {
+ vfree(sn_hwperf_salheap);
+ sn_hwperf_salheap = NULL;
+ sn_hwperf_obj_cnt = 0;
+ }
+
+ if (!e) {
+ /*
+ * Register a dynamic misc device for ioctl. Platforms
+ * supporting hotplug will create /dev/sn_hwperf, else
+ * user can to look up the minor number in /proc/misc.
+ */
+ if ((e = misc_register(&sn_hwperf_dev)) != 0) {
+ printk(KERN_ERR "sn_hwperf_init: misc register "
+ "for \"sn_hwperf\" failed, err %d\n", e);
+ }
+ }
+
+ up(&sn_hwperf_init_mutex);
+ return e;
+}
+
+int sn_topology_open(struct inode *inode, struct file *file)
+{
+ int e;
+ struct seq_file *seq;
+ struct sn_hwperf_object_info *objbuf;
+ int nobj;
+
+ if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) {
+ e = seq_open(file, &sn_topology_seq_ops);
+ seq = file->private_data;
+ seq->private = objbuf;
+ }
+
+ return e;
+}
+
+int sn_topology_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+
+ vfree(seq->private);
+ return seq_release(inode, file);
+}
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
new file mode 100644
index 00000000000..6a80fca807b
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -0,0 +1,149 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+#include <linux/config.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <asm/sn/sn_sal.h>
+
+static int partition_id_show(struct seq_file *s, void *p)
+{
+ seq_printf(s, "%d\n", sn_local_partid());
+ return 0;
+}
+
+static int partition_id_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, partition_id_show, NULL);
+}
+
+static int system_serial_number_show(struct seq_file *s, void *p)
+{
+ seq_printf(s, "%s\n", sn_system_serial_number());
+ return 0;
+}
+
+static int system_serial_number_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, system_serial_number_show, NULL);
+}
+
+static int licenseID_show(struct seq_file *s, void *p)
+{
+ seq_printf(s, "0x%lx\n", sn_partition_serial_number_val());
+ return 0;
+}
+
+static int licenseID_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, licenseID_show, NULL);
+}
+
+/*
+ * Enable forced interrupt by default.
+ * When set, the sn interrupt handler writes the force interrupt register on
+ * the bridge chip. The hardware will then send an interrupt message if the
+ * interrupt line is active. This mimics a level sensitive interrupt.
+ */
+int sn_force_interrupt_flag = 1;
+
+static int sn_force_interrupt_show(struct seq_file *s, void *p)
+{
+ seq_printf(s, "Force interrupt is %s\n",
+ sn_force_interrupt_flag ? "enabled" : "disabled");
+ return 0;
+}
+
+static ssize_t sn_force_interrupt_write_proc(struct file *file,
+ const char __user *buffer, size_t count, loff_t *data)
+{
+ char val;
+
+ if (copy_from_user(&val, buffer, 1))
+ return -EFAULT;
+
+ sn_force_interrupt_flag = (val == '0') ? 0 : 1;
+ return count;
+}
+
+static int sn_force_interrupt_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, sn_force_interrupt_show, NULL);
+}
+
+static int coherence_id_show(struct seq_file *s, void *p)
+{
+ seq_printf(s, "%d\n", partition_coherence_id());
+
+ return 0;
+}
+
+static int coherence_id_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, coherence_id_show, NULL);
+}
+
+static struct proc_dir_entry *sn_procfs_create_entry(
+ const char *name, struct proc_dir_entry *parent,
+ int (*openfunc)(struct inode *, struct file *),
+ int (*releasefunc)(struct inode *, struct file *))
+{
+ struct proc_dir_entry *e = create_proc_entry(name, 0444, parent);
+
+ if (e) {
+ e->proc_fops = (struct file_operations *)kmalloc(
+ sizeof(struct file_operations), GFP_KERNEL);
+ if (e->proc_fops) {
+ memset(e->proc_fops, 0, sizeof(struct file_operations));
+ e->proc_fops->open = openfunc;
+ e->proc_fops->read = seq_read;
+ e->proc_fops->llseek = seq_lseek;
+ e->proc_fops->release = releasefunc;
+ }
+ }
+
+ return e;
+}
+
+/* /proc/sgi_sn/sn_topology uses seq_file, see sn_hwperf.c */
+extern int sn_topology_open(struct inode *, struct file *);
+extern int sn_topology_release(struct inode *, struct file *);
+
+void register_sn_procfs(void)
+{
+ static struct proc_dir_entry *sgi_proc_dir = NULL;
+ struct proc_dir_entry *e;
+
+ BUG_ON(sgi_proc_dir != NULL);
+ if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL)))
+ return;
+
+ sn_procfs_create_entry("partition_id", sgi_proc_dir,
+ partition_id_open, single_release);
+
+ sn_procfs_create_entry("system_serial_number", sgi_proc_dir,
+ system_serial_number_open, single_release);
+
+ sn_procfs_create_entry("licenseID", sgi_proc_dir,
+ licenseID_open, single_release);
+
+ e = sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir,
+ sn_force_interrupt_open, single_release);
+ if (e)
+ e->proc_fops->write = sn_force_interrupt_write_proc;
+
+ sn_procfs_create_entry("coherence_id", sgi_proc_dir,
+ coherence_id_open, single_release);
+
+ sn_procfs_create_entry("sn_topology", sgi_proc_dir,
+ sn_topology_open, sn_topology_release);
+}
+
+#endif /* CONFIG_PROC_FS */
diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c
new file mode 100644
index 00000000000..deb9baf4d47
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/timer.c
@@ -0,0 +1,36 @@
+/*
+ * linux/arch/ia64/sn/kernel/sn2/timer.c
+ *
+ * Copyright (C) 2003 Silicon Graphics, Inc.
+ * Copyright (C) 2003 Hewlett-Packard Co
+ * David Mosberger <davidm@hpl.hp.com>: updated for new timer-interpolation infrastructure
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/interrupt.h>
+
+#include <asm/hw_irq.h>
+#include <asm/system.h>
+
+#include <asm/sn/leds.h>
+#include <asm/sn/shub_mmr.h>
+#include <asm/sn/clksupport.h>
+
+extern unsigned long sn_rtc_cycles_per_second;
+
+static struct time_interpolator sn2_interpolator = {
+ .drift = -1,
+ .shift = 10,
+ .mask = (1LL << 55) - 1,
+ .source = TIME_SOURCE_MMIO64
+};
+
+void __init sn_timer_init(void)
+{
+ sn2_interpolator.frequency = sn_rtc_cycles_per_second;
+ sn2_interpolator.addr = RTC_COUNTER_ADDR;
+ register_time_interpolator(&sn2_interpolator);
+}
diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c
new file mode 100644
index 00000000000..cde7375390b
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c
@@ -0,0 +1,63 @@
+/*
+ *
+ *
+ * Copyright (c) 2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/NoticeExplan
+ */
+
+#include <linux/interrupt.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/leds.h>
+
+extern void sn_lb_int_war_check(void);
+extern irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+
+#define SN_LB_INT_WAR_INTERVAL 100
+
+void sn_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ /* LED blinking */
+ if (!pda->hb_count--) {
+ pda->hb_count = HZ / 2;
+ set_led_bits(pda->hb_state ^=
+ LED_CPU_HEARTBEAT, LED_CPU_HEARTBEAT);
+ }
+
+ if (enable_shub_wars_1_1()) {
+ /* Bugfix code for SHUB 1.1 */
+ if (pda->pio_shub_war_cam_addr)
+ *pda->pio_shub_war_cam_addr = 0x8000000000000010UL;
+ }
+ if (pda->sn_lb_int_war_ticks == 0)
+ sn_lb_int_war_check();
+ pda->sn_lb_int_war_ticks++;
+ if (pda->sn_lb_int_war_ticks >= SN_LB_INT_WAR_INTERVAL)
+ pda->sn_lb_int_war_ticks = 0;
+}
diff --git a/arch/ia64/sn/pci/Makefile b/arch/ia64/sn/pci/Makefile
new file mode 100644
index 00000000000..b5dca0097a8
--- /dev/null
+++ b/arch/ia64/sn/pci/Makefile
@@ -0,0 +1,10 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+#
+# Makefile for the sn pci general routines.
+
+obj-y := pci_dma.o pcibr/
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
new file mode 100644
index 00000000000..f680824f819
--- /dev/null
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -0,0 +1,363 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000,2002-2005 Silicon Graphics, Inc. All rights reserved.
+ *
+ * Routines for PCI DMA mapping. See Documentation/DMA-API.txt for
+ * a description of how these routines should be used.
+ */
+
+#include <linux/module.h>
+#include <asm/dma.h>
+#include <asm/sn/sn_sal.h>
+#include "pci/pcibus_provider_defs.h"
+#include "pci/pcidev.h"
+#include "pci/pcibr_provider.h"
+
+#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
+#define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))
+
+/**
+ * sn_dma_supported - test a DMA mask
+ * @dev: device to test
+ * @mask: DMA mask to test
+ *
+ * Return whether the given PCI device DMA address mask can be supported
+ * properly. For example, if your device can only drive the low 24-bits
+ * during PCI bus mastering, then you would pass 0x00ffffff as the mask to
+ * this function. Of course, SN only supports devices that have 32 or more
+ * address bits when using the PMU.
+ */
+int sn_dma_supported(struct device *dev, u64 mask)
+{
+ BUG_ON(dev->bus != &pci_bus_type);
+
+ if (mask < 0x7fffffff)
+ return 0;
+ return 1;
+}
+EXPORT_SYMBOL(sn_dma_supported);
+
+/**
+ * sn_dma_set_mask - set the DMA mask
+ * @dev: device to set
+ * @dma_mask: new mask
+ *
+ * Set @dev's DMA mask if the hw supports it.
+ */
+int sn_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+ BUG_ON(dev->bus != &pci_bus_type);
+
+ if (!sn_dma_supported(dev, dma_mask))
+ return 0;
+
+ *dev->dma_mask = dma_mask;
+ return 1;
+}
+EXPORT_SYMBOL(sn_dma_set_mask);
+
+/**
+ * sn_dma_alloc_coherent - allocate memory for coherent DMA
+ * @dev: device to allocate for
+ * @size: size of the region
+ * @dma_handle: DMA (bus) address
+ * @flags: memory allocation flags
+ *
+ * dma_alloc_coherent() returns a pointer to a memory region suitable for
+ * coherent DMA traffic to/from a PCI device. On SN platforms, this means
+ * that @dma_handle will have the %PCIIO_DMA_CMD flag set.
+ *
+ * This interface is usually used for "command" streams (e.g. the command
+ * queue for a SCSI controller). See Documentation/DMA-API.txt for
+ * more information.
+ */
+void *sn_dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t * dma_handle, int flags)
+{
+ void *cpuaddr;
+ unsigned long phys_addr;
+ struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+
+ BUG_ON(dev->bus != &pci_bus_type);
+
+ /*
+ * Allocate the memory.
+ * FIXME: We should be doing alloc_pages_node for the node closest
+ * to the PCI device.
+ */
+ if (!(cpuaddr = (void *)__get_free_pages(GFP_ATOMIC, get_order(size))))
+ return NULL;
+
+ memset(cpuaddr, 0x0, size);
+
+ /* physical addr. of the memory we just got */
+ phys_addr = __pa(cpuaddr);
+
+ /*
+ * 64 bit address translations should never fail.
+ * 32 bit translations can fail if there are insufficient mapping
+ * resources.
+ */
+
+ *dma_handle = pcibr_dma_map(pcidev_info, phys_addr, size,
+ SN_PCIDMA_CONSISTENT);
+ if (!*dma_handle) {
+ printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
+ free_pages((unsigned long)cpuaddr, get_order(size));
+ return NULL;
+ }
+
+ return cpuaddr;
+}
+EXPORT_SYMBOL(sn_dma_alloc_coherent);
+
+/**
+ * sn_pci_free_coherent - free memory associated with coherent DMAable region
+ * @dev: device to free for
+ * @size: size to free
+ * @cpu_addr: kernel virtual address to free
+ * @dma_handle: DMA address associated with this region
+ *
+ * Frees the memory allocated by dma_alloc_coherent(), potentially unmapping
+ * any associated IOMMU mappings.
+ */
+void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_handle)
+{
+ struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+
+ BUG_ON(dev->bus != &pci_bus_type);
+
+ pcibr_dma_unmap(pcidev_info, dma_handle, 0);
+ free_pages((unsigned long)cpu_addr, get_order(size));
+}
+EXPORT_SYMBOL(sn_dma_free_coherent);
+
+/**
+ * sn_dma_map_single - map a single page for DMA
+ * @dev: device to map for
+ * @cpu_addr: kernel virtual address of the region to map
+ * @size: size of the region
+ * @direction: DMA direction
+ *
+ * Map the region pointed to by @cpu_addr for DMA and return the
+ * DMA address.
+ *
+ * We map this to the one step pcibr_dmamap_trans interface rather than
+ * the two step pcibr_dmamap_alloc/pcibr_dmamap_addr because we have
+ * no way of saving the dmamap handle from the alloc to later free
+ * (which is pretty much unacceptable).
+ *
+ * TODO: simplify our interface;
+ * figure out how to save dmamap handle so can use two step.
+ */
+dma_addr_t sn_dma_map_single(struct device *dev, void *cpu_addr, size_t size,
+ int direction)
+{
+ dma_addr_t dma_addr;
+ unsigned long phys_addr;
+ struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+
+ BUG_ON(dev->bus != &pci_bus_type);
+
+ phys_addr = __pa(cpu_addr);
+ dma_addr = pcibr_dma_map(pcidev_info, phys_addr, size, 0);
+ if (!dma_addr) {
+ printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
+ return 0;
+ }
+ return dma_addr;
+}
+EXPORT_SYMBOL(sn_dma_map_single);
+
+/**
+ * sn_dma_unmap_single - unamp a DMA mapped page
+ * @dev: device to sync
+ * @dma_addr: DMA address to sync
+ * @size: size of region
+ * @direction: DMA direction
+ *
+ * This routine is supposed to sync the DMA region specified
+ * by @dma_handle into the coherence domain. On SN, we're always cache
+ * coherent, so we just need to free any ATEs associated with this mapping.
+ */
+void sn_dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+ int direction)
+{
+ struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+
+ BUG_ON(dev->bus != &pci_bus_type);
+ pcibr_dma_unmap(pcidev_info, dma_addr, direction);
+}
+EXPORT_SYMBOL(sn_dma_unmap_single);
+
+/**
+ * sn_dma_unmap_sg - unmap a DMA scatterlist
+ * @dev: device to unmap
+ * @sg: scatterlist to unmap
+ * @nhwentries: number of scatterlist entries
+ * @direction: DMA direction
+ *
+ * Unmap a set of streaming mode DMA translations.
+ */
+void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nhwentries, int direction)
+{
+ int i;
+ struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+
+ BUG_ON(dev->bus != &pci_bus_type);
+
+ for (i = 0; i < nhwentries; i++, sg++) {
+ pcibr_dma_unmap(pcidev_info, sg->dma_address, direction);
+ sg->dma_address = (dma_addr_t) NULL;
+ sg->dma_length = 0;
+ }
+}
+EXPORT_SYMBOL(sn_dma_unmap_sg);
+
+/**
+ * sn_dma_map_sg - map a scatterlist for DMA
+ * @dev: device to map for
+ * @sg: scatterlist to map
+ * @nhwentries: number of entries
+ * @direction: direction of the DMA transaction
+ *
+ * Maps each entry of @sg for DMA.
+ */
+int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+ int direction)
+{
+ unsigned long phys_addr;
+ struct scatterlist *saved_sg = sg;
+ struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
+ int i;
+
+ BUG_ON(dev->bus != &pci_bus_type);
+
+ /*
+ * Setup a DMA address for each entry in the scatterlist.
+ */
+ for (i = 0; i < nhwentries; i++, sg++) {
+ phys_addr = SG_ENT_PHYS_ADDRESS(sg);
+ sg->dma_address = pcibr_dma_map(pcidev_info, phys_addr,
+ sg->length, 0);
+
+ if (!sg->dma_address) {
+ printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
+
+ /*
+ * Free any successfully allocated entries.
+ */
+ if (i > 0)
+ sn_dma_unmap_sg(dev, saved_sg, i, direction);
+ return 0;
+ }
+
+ sg->dma_length = sg->length;
+ }
+
+ return nhwentries;
+}
+EXPORT_SYMBOL(sn_dma_map_sg);
+
+void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+ BUG_ON(dev->bus != &pci_bus_type);
+}
+EXPORT_SYMBOL(sn_dma_sync_single_for_cpu);
+
+void sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+ BUG_ON(dev->bus != &pci_bus_type);
+}
+EXPORT_SYMBOL(sn_dma_sync_single_for_device);
+
+void sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+ int nelems, int direction)
+{
+ BUG_ON(dev->bus != &pci_bus_type);
+}
+EXPORT_SYMBOL(sn_dma_sync_sg_for_cpu);
+
+void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+ int nelems, int direction)
+{
+ BUG_ON(dev->bus != &pci_bus_type);
+}
+EXPORT_SYMBOL(sn_dma_sync_sg_for_device);
+
+int sn_dma_mapping_error(dma_addr_t dma_addr)
+{
+ return 0;
+}
+EXPORT_SYMBOL(sn_dma_mapping_error);
+
+char *sn_pci_get_legacy_mem(struct pci_bus *bus)
+{
+ if (!SN_PCIBUS_BUSSOFT(bus))
+ return ERR_PTR(-ENODEV);
+
+ return (char *)(SN_PCIBUS_BUSSOFT(bus)->bs_legacy_mem | __IA64_UNCACHED_OFFSET);
+}
+
+int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
+{
+ unsigned long addr;
+ int ret;
+
+ if (!SN_PCIBUS_BUSSOFT(bus))
+ return -ENODEV;
+
+ addr = SN_PCIBUS_BUSSOFT(bus)->bs_legacy_io | __IA64_UNCACHED_OFFSET;
+ addr += port;
+
+ ret = ia64_sn_probe_mem(addr, (long)size, (void *)val);
+
+ if (ret == 2)
+ return -EINVAL;
+
+ if (ret == 1)
+ *val = -1;
+
+ return size;
+}
+
+int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
+{
+ int ret = size;
+ unsigned long paddr;
+ unsigned long *addr;
+
+ if (!SN_PCIBUS_BUSSOFT(bus)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ /* Put the phys addr in uncached space */
+ paddr = SN_PCIBUS_BUSSOFT(bus)->bs_legacy_io | __IA64_UNCACHED_OFFSET;
+ paddr += port;
+ addr = (unsigned long *)paddr;
+
+ switch (size) {
+ case 1:
+ *(volatile u8 *)(addr) = (u8)(val);
+ break;
+ case 2:
+ *(volatile u16 *)(addr) = (u16)(val);
+ break;
+ case 4:
+ *(volatile u32 *)(addr) = (u32)(val);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ out:
+ return ret;
+}
diff --git a/arch/ia64/sn/pci/pcibr/Makefile b/arch/ia64/sn/pci/pcibr/Makefile
new file mode 100644
index 00000000000..1850c4a94c4
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/Makefile
@@ -0,0 +1,11 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2002-2004 Silicon Graphics, Inc. All Rights Reserved.
+#
+# Makefile for the sn2 io routines.
+
+obj-y += pcibr_dma.o pcibr_reg.o \
+ pcibr_ate.o pcibr_provider.o
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
new file mode 100644
index 00000000000..9d6854666f9
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
@@ -0,0 +1,188 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <asm/sn/sn_sal.h>
+#include "pci/pcibus_provider_defs.h"
+#include "pci/pcidev.h"
+#include "pci/pcibr_provider.h"
+
+int pcibr_invalidate_ate = 0; /* by default don't invalidate ATE on free */
+
+/*
+ * mark_ate: Mark the ate as either free or inuse.
+ */
+static void mark_ate(struct ate_resource *ate_resource, int start, int number,
+ uint64_t value)
+{
+
+ uint64_t *ate = ate_resource->ate;
+ int index;
+ int length = 0;
+
+ for (index = start; length < number; index++, length++)
+ ate[index] = value;
+
+}
+
+/*
+ * find_free_ate: Find the first free ate index starting from the given
+ * index for the desired consequtive count.
+ */
+static int find_free_ate(struct ate_resource *ate_resource, int start,
+ int count)
+{
+
+ uint64_t *ate = ate_resource->ate;
+ int index;
+ int start_free;
+
+ for (index = start; index < ate_resource->num_ate;) {
+ if (!ate[index]) {
+ int i;
+ int free;
+ free = 0;
+ start_free = index; /* Found start free ate */
+ for (i = start_free; i < ate_resource->num_ate; i++) {
+ if (!ate[i]) { /* This is free */
+ if (++free == count)
+ return start_free;
+ } else {
+ index = i + 1;
+ break;
+ }
+ }
+ } else
+ index++; /* Try next ate */
+ }
+
+ return -1;
+}
+
+/*
+ * free_ate_resource: Free the requested number of ATEs.
+ */
+static inline void free_ate_resource(struct ate_resource *ate_resource,
+ int start)
+{
+
+ mark_ate(ate_resource, start, ate_resource->ate[start], 0);
+ if ((ate_resource->lowest_free_index > start) ||
+ (ate_resource->lowest_free_index < 0))
+ ate_resource->lowest_free_index = start;
+
+}
+
+/*
+ * alloc_ate_resource: Allocate the requested number of ATEs.
+ */
+static inline int alloc_ate_resource(struct ate_resource *ate_resource,
+ int ate_needed)
+{
+
+ int start_index;
+
+ /*
+ * Check for ate exhaustion.
+ */
+ if (ate_resource->lowest_free_index < 0)
+ return -1;
+
+ /*
+ * Find the required number of free consequtive ates.
+ */
+ start_index =
+ find_free_ate(ate_resource, ate_resource->lowest_free_index,
+ ate_needed);
+ if (start_index >= 0)
+ mark_ate(ate_resource, start_index, ate_needed, ate_needed);
+
+ ate_resource->lowest_free_index =
+ find_free_ate(ate_resource, ate_resource->lowest_free_index, 1);
+
+ return start_index;
+}
+
+/*
+ * Allocate "count" contiguous Bridge Address Translation Entries
+ * on the specified bridge to be used for PCI to XTALK mappings.
+ * Indices in rm map range from 1..num_entries. Indicies returned
+ * to caller range from 0..num_entries-1.
+ *
+ * Return the start index on success, -1 on failure.
+ */
+int pcibr_ate_alloc(struct pcibus_info *pcibus_info, int count)
+{
+ int status = 0;
+ uint64_t flag;
+
+ flag = pcibr_lock(pcibus_info);
+ status = alloc_ate_resource(&pcibus_info->pbi_int_ate_resource, count);
+
+ if (status < 0) {
+ /* Failed to allocate */
+ pcibr_unlock(pcibus_info, flag);
+ return -1;
+ }
+
+ pcibr_unlock(pcibus_info, flag);
+
+ return status;
+}
+
+/*
+ * Setup an Address Translation Entry as specified. Use either the Bridge
+ * internal maps or the external map RAM, as appropriate.
+ */
+static inline uint64_t *pcibr_ate_addr(struct pcibus_info *pcibus_info,
+ int ate_index)
+{
+ if (ate_index < pcibus_info->pbi_int_ate_size) {
+ return pcireg_int_ate_addr(pcibus_info, ate_index);
+ }
+ panic("pcibr_ate_addr: invalid ate_index 0x%x", ate_index);
+}
+
+/*
+ * Update the ate.
+ */
+void inline
+ate_write(struct pcibus_info *pcibus_info, int ate_index, int count,
+ volatile uint64_t ate)
+{
+ while (count-- > 0) {
+ if (ate_index < pcibus_info->pbi_int_ate_size) {
+ pcireg_int_ate_set(pcibus_info, ate_index, ate);
+ } else {
+ panic("ate_write: invalid ate_index 0x%x", ate_index);
+ }
+ ate_index++;
+ ate += IOPGSIZE;
+ }
+
+ pcireg_tflush_get(pcibus_info); /* wait until Bridge PIO complete */
+}
+
+void pcibr_ate_free(struct pcibus_info *pcibus_info, int index)
+{
+
+ volatile uint64_t ate;
+ int count;
+ uint64_t flags;
+
+ if (pcibr_invalidate_ate) {
+ /* For debugging purposes, clear the valid bit in the ATE */
+ ate = *pcibr_ate_addr(pcibus_info, index);
+ count = pcibus_info->pbi_int_ate_resource.ate[index];
+ ate_write(pcibus_info, index, count, (ate & ~PCI32_ATE_V));
+ }
+
+ flags = pcibr_lock(pcibus_info);
+ free_ate_resource(&pcibus_info->pbi_int_ate_resource, index);
+ pcibr_unlock(pcibus_info, flags);
+}
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
new file mode 100644
index 00000000000..b1d66ac065c
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -0,0 +1,379 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/geo.h>
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+#include "pci/pcibus_provider_defs.h"
+#include "pci/pcidev.h"
+#include "pci/tiocp.h"
+#include "pci/pic.h"
+#include "pci/pcibr_provider.h"
+#include "pci/tiocp.h"
+#include "tio.h"
+#include <asm/sn/addrs.h>
+
+extern int sn_ioif_inited;
+
+/* =====================================================================
+ * DMA MANAGEMENT
+ *
+ * The Bridge ASIC provides three methods of doing DMA: via a "direct map"
+ * register available in 32-bit PCI space (which selects a contiguous 2G
+ * address space on some other widget), via "direct" addressing via 64-bit
+ * PCI space (all destination information comes from the PCI address,
+ * including transfer attributes), and via a "mapped" region that allows
+ * a bunch of different small mappings to be established with the PMU.
+ *
+ * For efficiency, we most prefer to use the 32bit direct mapping facility,
+ * since it requires no resource allocations. The advantage of using the
+ * PMU over the 64-bit direct is that single-cycle PCI addressing can be
+ * used; the advantage of using 64-bit direct over PMU addressing is that
+ * we do not have to allocate entries in the PMU.
+ */
+
+static uint64_t
+pcibr_dmamap_ate32(struct pcidev_info *info,
+ uint64_t paddr, size_t req_size, uint64_t flags)
+{
+
+ struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info;
+ struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info->
+ pdi_pcibus_info;
+ uint8_t internal_device = (PCI_SLOT(pcidev_info->pdi_host_pcidev_info->
+ pdi_linux_pcidev->devfn)) - 1;
+ int ate_count;
+ int ate_index;
+ uint64_t ate_flags = flags | PCI32_ATE_V;
+ uint64_t ate;
+ uint64_t pci_addr;
+ uint64_t xio_addr;
+ uint64_t offset;
+
+ /* PIC in PCI-X mode does not supports 32bit PageMap mode */
+ if (IS_PIC_SOFT(pcibus_info) && IS_PCIX(pcibus_info)) {
+ return 0;
+ }
+
+ /* Calculate the number of ATEs needed. */
+ if (!(MINIMAL_ATE_FLAG(paddr, req_size))) {
+ ate_count = IOPG((IOPGSIZE - 1) /* worst case start offset */
+ +req_size /* max mapping bytes */
+ - 1) + 1; /* round UP */
+ } else { /* assume requested target is page aligned */
+ ate_count = IOPG(req_size /* max mapping bytes */
+ - 1) + 1; /* round UP */
+ }
+
+ /* Get the number of ATEs required. */
+ ate_index = pcibr_ate_alloc(pcibus_info, ate_count);
+ if (ate_index < 0)
+ return 0;
+
+ /* In PCI-X mode, Prefetch not supported */
+ if (IS_PCIX(pcibus_info))
+ ate_flags &= ~(PCI32_ATE_PREF);
+
+ xio_addr =
+ IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
+ PHYS_TO_TIODMA(paddr);
+ offset = IOPGOFF(xio_addr);
+ ate = ate_flags | (xio_addr - offset);
+
+ /* If PIC, put the targetid in the ATE */
+ if (IS_PIC_SOFT(pcibus_info)) {
+ ate |= (pcibus_info->pbi_hub_xid << PIC_ATE_TARGETID_SHFT);
+ }
+ ate_write(pcibus_info, ate_index, ate_count, ate);
+
+ /*
+ * Set up the DMA mapped Address.
+ */
+ pci_addr = PCI32_MAPPED_BASE + offset + IOPGSIZE * ate_index;
+
+ /*
+ * If swap was set in device in pcibr_endian_set()
+ * we need to turn swapping on.
+ */
+ if (pcibus_info->pbi_devreg[internal_device] & PCIBR_DEV_SWAP_DIR)
+ ATE_SWAP_ON(pci_addr);
+
+ return pci_addr;
+}
+
+static uint64_t
+pcibr_dmatrans_direct64(struct pcidev_info * info, uint64_t paddr,
+ uint64_t dma_attributes)
+{
+ struct pcibus_info *pcibus_info = (struct pcibus_info *)
+ ((info->pdi_host_pcidev_info)->pdi_pcibus_info);
+ uint64_t pci_addr;
+
+ /* Translate to Crosstalk View of Physical Address */
+ pci_addr = (IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
+ PHYS_TO_TIODMA(paddr)) | dma_attributes;
+
+ /* Handle Bus mode */
+ if (IS_PCIX(pcibus_info))
+ pci_addr &= ~PCI64_ATTR_PREF;
+
+ /* Handle Bridge Chipset differences */
+ if (IS_PIC_SOFT(pcibus_info)) {
+ pci_addr |=
+ ((uint64_t) pcibus_info->
+ pbi_hub_xid << PIC_PCI64_ATTR_TARG_SHFT);
+ } else
+ pci_addr |= TIOCP_PCI64_CMDTYPE_MEM;
+
+ /* If PCI mode, func zero uses VCHAN0, every other func uses VCHAN1 */
+ if (!IS_PCIX(pcibus_info) && PCI_FUNC(info->pdi_linux_pcidev->devfn))
+ pci_addr |= PCI64_ATTR_VIRTUAL;
+
+ return pci_addr;
+
+}
+
+static uint64_t
+pcibr_dmatrans_direct32(struct pcidev_info * info,
+ uint64_t paddr, size_t req_size, uint64_t flags)
+{
+
+ struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info;
+ struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info->
+ pdi_pcibus_info;
+ uint64_t xio_addr;
+
+ uint64_t xio_base;
+ uint64_t offset;
+ uint64_t endoff;
+
+ if (IS_PCIX(pcibus_info)) {
+ return 0;
+ }
+
+ xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
+ PHYS_TO_TIODMA(paddr);
+
+ xio_base = pcibus_info->pbi_dir_xbase;
+ offset = xio_addr - xio_base;
+ endoff = req_size + offset;
+ if ((req_size > (1ULL << 31)) || /* Too Big */
+ (xio_addr < xio_base) || /* Out of range for mappings */
+ (endoff > (1ULL << 31))) { /* Too Big */
+ return 0;
+ }
+
+ return PCI32_DIRECT_BASE | offset;
+
+}
+
+/*
+ * Wrapper routine for free'ing DMA maps
+ * DMA mappings for Direct 64 and 32 do not have any DMA maps.
+ */
+void
+pcibr_dma_unmap(struct pcidev_info *pcidev_info, dma_addr_t dma_handle,
+ int direction)
+{
+ struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info->
+ pdi_pcibus_info;
+
+ if (IS_PCI32_MAPPED(dma_handle)) {
+ int ate_index;
+
+ ate_index =
+ IOPG((ATE_SWAP_OFF(dma_handle) - PCI32_MAPPED_BASE));
+ pcibr_ate_free(pcibus_info, ate_index);
+ }
+}
+
+/*
+ * On SN systems there is a race condition between a PIO read response and
+ * DMA's. In rare cases, the read response may beat the DMA, causing the
+ * driver to think that data in memory is complete and meaningful. This code
+ * eliminates that race. This routine is called by the PIO read routines
+ * after doing the read. For PIC this routine then forces a fake interrupt
+ * on another line, which is logically associated with the slot that the PIO
+ * is addressed to. It then spins while watching the memory location that
+ * the interrupt is targetted to. When the interrupt response arrives, we
+ * are sure that the DMA has landed in memory and it is safe for the driver
+ * to proceed. For TIOCP use the Device(x) Write Request Buffer Flush
+ * Bridge register since it ensures the data has entered the coherence domain,
+ * unlike the PIC Device(x) Write Request Buffer Flush register.
+ */
+
+void sn_dma_flush(uint64_t addr)
+{
+ nasid_t nasid;
+ int is_tio;
+ int wid_num;
+ int i, j;
+ int bwin;
+ uint64_t flags;
+ struct hubdev_info *hubinfo;
+ volatile struct sn_flush_device_list *p;
+ struct sn_flush_nasid_entry *flush_nasid_list;
+
+ if (!sn_ioif_inited)
+ return;
+
+ nasid = NASID_GET(addr);
+ if (-1 == nasid_to_cnodeid(nasid))
+ return;
+
+ hubinfo = (NODEPDA(nasid_to_cnodeid(nasid)))->pdinfo;
+
+ if (!hubinfo) {
+ BUG();
+ }
+ is_tio = (nasid & 1);
+ if (is_tio) {
+ wid_num = TIO_SWIN_WIDGETNUM(addr);
+ bwin = TIO_BWIN_WINDOWNUM(addr);
+ } else {
+ wid_num = SWIN_WIDGETNUM(addr);
+ bwin = BWIN_WINDOWNUM(addr);
+ }
+
+ flush_nasid_list = &hubinfo->hdi_flush_nasid_list;
+ if (flush_nasid_list->widget_p == NULL)
+ return;
+ if (bwin > 0) {
+ uint64_t itte = flush_nasid_list->iio_itte[bwin];
+
+ if (is_tio) {
+ wid_num = (itte >> TIO_ITTE_WIDGET_SHIFT) &
+ TIO_ITTE_WIDGET_MASK;
+ } else {
+ wid_num = (itte >> IIO_ITTE_WIDGET_SHIFT) &
+ IIO_ITTE_WIDGET_MASK;
+ }
+ }
+ if (flush_nasid_list->widget_p == NULL)
+ return;
+ if (flush_nasid_list->widget_p[wid_num] == NULL)
+ return;
+ p = &flush_nasid_list->widget_p[wid_num][0];
+
+ /* find a matching BAR */
+ for (i = 0; i < DEV_PER_WIDGET; i++) {
+ for (j = 0; j < PCI_ROM_RESOURCE; j++) {
+ if (p->sfdl_bar_list[j].start == 0)
+ break;
+ if (addr >= p->sfdl_bar_list[j].start
+ && addr <= p->sfdl_bar_list[j].end)
+ break;
+ }
+ if (j < PCI_ROM_RESOURCE && p->sfdl_bar_list[j].start != 0)
+ break;
+ p++;
+ }
+
+ /* if no matching BAR, return without doing anything. */
+ if (i == DEV_PER_WIDGET)
+ return;
+
+ /*
+ * For TIOCP use the Device(x) Write Request Buffer Flush Bridge
+ * register since it ensures the data has entered the coherence
+ * domain, unlike PIC
+ */
+ if (is_tio) {
+ uint32_t tio_id = REMOTE_HUB_L(nasid, TIO_NODE_ID);
+ uint32_t revnum = XWIDGET_PART_REV_NUM(tio_id);
+
+ /* TIOCP BRINGUP WAR (PV907516): Don't write buffer flush reg */
+ if ((1 << XWIDGET_PART_REV_NUM_REV(revnum)) & PV907516) {
+ return;
+ } else {
+ pcireg_wrb_flush_get(p->sfdl_pcibus_info,
+ (p->sfdl_slot - 1));
+ }
+ } else {
+ spin_lock_irqsave(&((struct sn_flush_device_list *)p)->
+ sfdl_flush_lock, flags);
+
+ p->sfdl_flush_value = 0;
+
+ /* force an interrupt. */
+ *(volatile uint32_t *)(p->sfdl_force_int_addr) = 1;
+
+ /* wait for the interrupt to come back. */
+ while (*(p->sfdl_flush_addr) != 0x10f) ;
+
+ /* okay, everything is synched up. */
+ spin_unlock_irqrestore((spinlock_t *)&p->sfdl_flush_lock, flags);
+ }
+ return;
+}
+
+/*
+ * Wrapper DMA interface. Called from pci_dma.c routines.
+ */
+
+uint64_t
+pcibr_dma_map(struct pcidev_info * pcidev_info, unsigned long phys_addr,
+ size_t size, unsigned int flags)
+{
+ dma_addr_t dma_handle;
+ struct pci_dev *pcidev = pcidev_info->pdi_linux_pcidev;
+
+ if (flags & SN_PCIDMA_CONSISTENT) {
+ /* sn_pci_alloc_consistent interfaces */
+ if (pcidev->dev.coherent_dma_mask == ~0UL) {
+ dma_handle =
+ pcibr_dmatrans_direct64(pcidev_info, phys_addr,
+ PCI64_ATTR_BAR);
+ } else {
+ dma_handle =
+ (dma_addr_t) pcibr_dmamap_ate32(pcidev_info,
+ phys_addr, size,
+ PCI32_ATE_BAR);
+ }
+ } else {
+ /* map_sg/map_single interfaces */
+
+ /* SN cannot support DMA addresses smaller than 32 bits. */
+ if (pcidev->dma_mask < 0x7fffffff) {
+ return 0;
+ }
+
+ if (pcidev->dma_mask == ~0UL) {
+ /*
+ * Handle the most common case: 64 bit cards. This
+ * call should always succeed.
+ */
+
+ dma_handle =
+ pcibr_dmatrans_direct64(pcidev_info, phys_addr,
+ PCI64_ATTR_PREF);
+ } else {
+ /* Handle 32-63 bit cards via direct mapping */
+ dma_handle =
+ pcibr_dmatrans_direct32(pcidev_info, phys_addr,
+ size, 0);
+ if (!dma_handle) {
+ /*
+ * It is a 32 bit card and we cannot do direct mapping,
+ * so we use an ATE.
+ */
+
+ dma_handle =
+ pcibr_dmamap_ate32(pcidev_info, phys_addr,
+ size, PCI32_ATE_PREF);
+ }
+ }
+ }
+
+ return dma_handle;
+}
+
+EXPORT_SYMBOL(sn_dma_flush);
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
new file mode 100644
index 00000000000..92bd278cf7f
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -0,0 +1,170 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <asm/sn/sn_sal.h>
+#include "xtalk/xwidgetdev.h"
+#include <asm/sn/geo.h>
+#include "xtalk/hubdev.h"
+#include "pci/pcibus_provider_defs.h"
+#include "pci/pcidev.h"
+#include "pci/pcibr_provider.h"
+#include <asm/sn/addrs.h>
+
+
+static int sal_pcibr_error_interrupt(struct pcibus_info *soft)
+{
+ struct ia64_sal_retval ret_stuff;
+ uint64_t busnum;
+ int segment;
+ ret_stuff.status = 0;
+ ret_stuff.v0 = 0;
+
+ segment = 0;
+ busnum = soft->pbi_buscommon.bs_persist_busnum;
+ SAL_CALL_NOLOCK(ret_stuff,
+ (u64) SN_SAL_IOIF_ERROR_INTERRUPT,
+ (u64) segment, (u64) busnum, 0, 0, 0, 0, 0);
+
+ return (int)ret_stuff.v0;
+}
+
+/*
+ * PCI Bridge Error interrupt handler. Gets invoked whenever a PCI
+ * bridge sends an error interrupt.
+ */
+static irqreturn_t
+pcibr_error_intr_handler(int irq, void *arg, struct pt_regs *regs)
+{
+ struct pcibus_info *soft = (struct pcibus_info *)arg;
+
+ if (sal_pcibr_error_interrupt(soft) < 0) {
+ panic("pcibr_error_intr_handler(): Fatal Bridge Error");
+ }
+ return IRQ_HANDLED;
+}
+
+void *
+pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft)
+{
+ int nasid, cnode, j;
+ struct hubdev_info *hubdev_info;
+ struct pcibus_info *soft;
+ struct sn_flush_device_list *sn_flush_device_list;
+
+ if (! IS_PCI_BRIDGE_ASIC(prom_bussoft->bs_asic_type)) {
+ return NULL;
+ }
+
+ /*
+ * Allocate kernel bus soft and copy from prom.
+ */
+
+ soft = kmalloc(sizeof(struct pcibus_info), GFP_KERNEL);
+ if (!soft) {
+ return NULL;
+ }
+
+ memcpy(soft, prom_bussoft, sizeof(struct pcibus_info));
+ soft->pbi_buscommon.bs_base =
+ (((u64) soft->pbi_buscommon.
+ bs_base << 4) >> 4) | __IA64_UNCACHED_OFFSET;
+
+ spin_lock_init(&soft->pbi_lock);
+
+ /*
+ * register the bridge's error interrupt handler
+ */
+ if (request_irq(SGI_PCIBR_ERROR, (void *)pcibr_error_intr_handler,
+ SA_SHIRQ, "PCIBR error", (void *)(soft))) {
+ printk(KERN_WARNING
+ "pcibr cannot allocate interrupt for error handler\n");
+ }
+
+ /*
+ * Update the Bridge with the "kernel" pagesize
+ */
+ if (PAGE_SIZE < 16384) {
+ pcireg_control_bit_clr(soft, PCIBR_CTRL_PAGE_SIZE);
+ } else {
+ pcireg_control_bit_set(soft, PCIBR_CTRL_PAGE_SIZE);
+ }
+
+ nasid = NASID_GET(soft->pbi_buscommon.bs_base);
+ cnode = nasid_to_cnodeid(nasid);
+ hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
+
+ if (hubdev_info->hdi_flush_nasid_list.widget_p) {
+ sn_flush_device_list = hubdev_info->hdi_flush_nasid_list.
+ widget_p[(int)soft->pbi_buscommon.bs_xid];
+ if (sn_flush_device_list) {
+ for (j = 0; j < DEV_PER_WIDGET;
+ j++, sn_flush_device_list++) {
+ if (sn_flush_device_list->sfdl_slot == -1)
+ continue;
+ if (sn_flush_device_list->
+ sfdl_persistent_busnum ==
+ soft->pbi_buscommon.bs_persist_busnum)
+ sn_flush_device_list->sfdl_pcibus_info =
+ soft;
+ }
+ }
+ }
+
+ /* Setup the PMU ATE map */
+ soft->pbi_int_ate_resource.lowest_free_index = 0;
+ soft->pbi_int_ate_resource.ate =
+ kmalloc(soft->pbi_int_ate_size * sizeof(uint64_t), GFP_KERNEL);
+ memset(soft->pbi_int_ate_resource.ate, 0,
+ (soft->pbi_int_ate_size * sizeof(uint64_t)));
+
+ return soft;
+}
+
+void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info)
+{
+ struct pcidev_info *pcidev_info;
+ struct pcibus_info *pcibus_info;
+ int bit = sn_irq_info->irq_int_bit;
+
+ pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+ if (pcidev_info) {
+ pcibus_info =
+ (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info->
+ pdi_pcibus_info;
+ pcireg_force_intr_set(pcibus_info, bit);
+ }
+}
+
+void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info)
+{
+ struct pcidev_info *pcidev_info;
+ struct pcibus_info *pcibus_info;
+ int bit = sn_irq_info->irq_int_bit;
+ uint64_t xtalk_addr = sn_irq_info->irq_xtalkaddr;
+
+ pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+ if (pcidev_info) {
+ pcibus_info =
+ (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info->
+ pdi_pcibus_info;
+
+ /* Disable the device's IRQ */
+ pcireg_intr_enable_bit_clr(pcibus_info, bit);
+
+ /* Change the device's IRQ */
+ pcireg_intr_addr_addr_set(pcibus_info, bit, xtalk_addr);
+
+ /* Re-enable the device's IRQ */
+ pcireg_intr_enable_bit_set(pcibus_info, bit);
+
+ pcibr_force_interrupt(sn_irq_info);
+ }
+}
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/arch/ia64/sn/pci/pcibr/pcibr_reg.c
new file mode 100644
index 00000000000..74a74a7d2a1
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/pcibr_reg.c
@@ -0,0 +1,282 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include "pci/pcibus_provider_defs.h"
+#include "pci/pcidev.h"
+#include "pci/tiocp.h"
+#include "pci/pic.h"
+#include "pci/pcibr_provider.h"
+
+union br_ptr {
+ struct tiocp tio;
+ struct pic pic;
+};
+
+/*
+ * Control Register Access -- Read/Write 0000_0020
+ */
+void pcireg_control_bit_clr(struct pcibus_info *pcibus_info, uint64_t bits)
+{
+ union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
+
+ if (pcibus_info) {
+ switch (pcibus_info->pbi_bridge_type) {
+ case PCIBR_BRIDGETYPE_TIOCP:
+ ptr->tio.cp_control &= ~bits;
+ break;
+ case PCIBR_BRIDGETYPE_PIC:
+ ptr->pic.p_wid_control &= ~bits;
+ break;
+ default:
+ panic
+ ("pcireg_control_bit_clr: unknown bridgetype bridge 0x%p",
+ (void *)ptr);
+ }
+ }
+}
+
+void pcireg_control_bit_set(struct pcibus_info *pcibus_info, uint64_t bits)
+{
+ union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
+
+ if (pcibus_info) {
+ switch (pcibus_info->pbi_bridge_type) {
+ case PCIBR_BRIDGETYPE_TIOCP:
+ ptr->tio.cp_control |= bits;
+ break;
+ case PCIBR_BRIDGETYPE_PIC:
+ ptr->pic.p_wid_control |= bits;
+ break;
+ default:
+ panic
+ ("pcireg_control_bit_set: unknown bridgetype bridge 0x%p",
+ (void *)ptr);
+ }
+ }
+}
+
+/*
+ * PCI/PCIX Target Flush Register Access -- Read Only 0000_0050
+ */
+uint64_t pcireg_tflush_get(struct pcibus_info *pcibus_info)
+{
+ union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
+ uint64_t ret = 0;
+
+ if (pcibus_info) {
+ switch (pcibus_info->pbi_bridge_type) {
+ case PCIBR_BRIDGETYPE_TIOCP:
+ ret = ptr->tio.cp_tflush;
+ break;
+ case PCIBR_BRIDGETYPE_PIC:
+ ret = ptr->pic.p_wid_tflush;
+ break;
+ default:
+ panic
+ ("pcireg_tflush_get: unknown bridgetype bridge 0x%p",
+ (void *)ptr);
+ }
+ }
+
+ /* Read of the Target Flush should always return zero */
+ if (ret != 0)
+ panic("pcireg_tflush_get:Target Flush failed\n");
+
+ return ret;
+}
+
+/*
+ * Interrupt Status Register Access -- Read Only 0000_0100
+ */
+uint64_t pcireg_intr_status_get(struct pcibus_info * pcibus_info)
+{
+ union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
+ uint64_t ret = 0;
+
+ if (pcibus_info) {
+ switch (pcibus_info->pbi_bridge_type) {
+ case PCIBR_BRIDGETYPE_TIOCP:
+ ret = ptr->tio.cp_int_status;
+ break;
+ case PCIBR_BRIDGETYPE_PIC:
+ ret = ptr->pic.p_int_status;
+ break;
+ default:
+ panic
+ ("pcireg_intr_status_get: unknown bridgetype bridge 0x%p",
+ (void *)ptr);
+ }
+ }
+ return ret;
+}
+
+/*
+ * Interrupt Enable Register Access -- Read/Write 0000_0108
+ */
+void pcireg_intr_enable_bit_clr(struct pcibus_info *pcibus_info, uint64_t bits)
+{
+ union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
+
+ if (pcibus_info) {
+ switch (pcibus_info->pbi_bridge_type) {
+ case PCIBR_BRIDGETYPE_TIOCP:
+ ptr->tio.cp_int_enable &= ~bits;
+ break;
+ case PCIBR_BRIDGETYPE_PIC:
+ ptr->pic.p_int_enable &= ~bits;
+ break;
+ default:
+ panic
+ ("pcireg_intr_enable_bit_clr: unknown bridgetype bridge 0x%p",
+ (void *)ptr);
+ }
+ }
+}
+
+void pcireg_intr_enable_bit_set(struct pcibus_info *pcibus_info, uint64_t bits)
+{
+ union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
+
+ if (pcibus_info) {
+ switch (pcibus_info->pbi_bridge_type) {
+ case PCIBR_BRIDGETYPE_TIOCP:
+ ptr->tio.cp_int_enable |= bits;
+ break;
+ case PCIBR_BRIDGETYPE_PIC:
+ ptr->pic.p_int_enable |= bits;
+ break;
+ default:
+ panic
+ ("pcireg_intr_enable_bit_set: unknown bridgetype bridge 0x%p",
+ (void *)ptr);
+ }
+ }
+}
+
+/*
+ * Intr Host Address Register (int_addr) -- Read/Write 0000_0130 - 0000_0168
+ */
+void pcireg_intr_addr_addr_set(struct pcibus_info *pcibus_info, int int_n,
+ uint64_t addr)
+{
+ union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
+
+ if (pcibus_info) {
+ switch (pcibus_info->pbi_bridge_type) {
+ case PCIBR_BRIDGETYPE_TIOCP:
+ ptr->tio.cp_int_addr[int_n] &= ~TIOCP_HOST_INTR_ADDR;
+ ptr->tio.cp_int_addr[int_n] |=
+ (addr & TIOCP_HOST_INTR_ADDR);
+ break;
+ case PCIBR_BRIDGETYPE_PIC:
+ ptr->pic.p_int_addr[int_n] &= ~PIC_HOST_INTR_ADDR;
+ ptr->pic.p_int_addr[int_n] |=
+ (addr & PIC_HOST_INTR_ADDR);
+ break;
+ default:
+ panic
+ ("pcireg_intr_addr_addr_get: unknown bridgetype bridge 0x%p",
+ (void *)ptr);
+ }
+ }
+}
+
+/*
+ * Force Interrupt Register Access -- Write Only 0000_01C0 - 0000_01F8
+ */
+void pcireg_force_intr_set(struct pcibus_info *pcibus_info, int int_n)
+{
+ union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
+
+ if (pcibus_info) {
+ switch (pcibus_info->pbi_bridge_type) {
+ case PCIBR_BRIDGETYPE_TIOCP:
+ ptr->tio.cp_force_pin[int_n] = 1;
+ break;
+ case PCIBR_BRIDGETYPE_PIC:
+ ptr->pic.p_force_pin[int_n] = 1;
+ break;
+ default:
+ panic
+ ("pcireg_force_intr_set: unknown bridgetype bridge 0x%p",
+ (void *)ptr);
+ }
+ }
+}
+
+/*
+ * Device(x) Write Buffer Flush Reg Access -- Read Only 0000_0240 - 0000_0258
+ */
+uint64_t pcireg_wrb_flush_get(struct pcibus_info *pcibus_info, int device)
+{
+ union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
+ uint64_t ret = 0;
+
+ if (pcibus_info) {
+ switch (pcibus_info->pbi_bridge_type) {
+ case PCIBR_BRIDGETYPE_TIOCP:
+ ret = ptr->tio.cp_wr_req_buf[device];
+ break;
+ case PCIBR_BRIDGETYPE_PIC:
+ ret = ptr->pic.p_wr_req_buf[device];
+ break;
+ default:
+ panic("pcireg_wrb_flush_get: unknown bridgetype bridge 0x%p", (void *)ptr);
+ }
+
+ }
+ /* Read of the Write Buffer Flush should always return zero */
+ return ret;
+}
+
+void pcireg_int_ate_set(struct pcibus_info *pcibus_info, int ate_index,
+ uint64_t val)
+{
+ union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
+
+ if (pcibus_info) {
+ switch (pcibus_info->pbi_bridge_type) {
+ case PCIBR_BRIDGETYPE_TIOCP:
+ ptr->tio.cp_int_ate_ram[ate_index] = (uint64_t) val;
+ break;
+ case PCIBR_BRIDGETYPE_PIC:
+ ptr->pic.p_int_ate_ram[ate_index] = (uint64_t) val;
+ break;
+ default:
+ panic
+ ("pcireg_int_ate_set: unknown bridgetype bridge 0x%p",
+ (void *)ptr);
+ }
+ }
+}
+
+uint64_t *pcireg_int_ate_addr(struct pcibus_info *pcibus_info, int ate_index)
+{
+ union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
+ uint64_t *ret = (uint64_t *) 0;
+
+ if (pcibus_info) {
+ switch (pcibus_info->pbi_bridge_type) {
+ case PCIBR_BRIDGETYPE_TIOCP:
+ ret =
+ (uint64_t *) & (ptr->tio.cp_int_ate_ram[ate_index]);
+ break;
+ case PCIBR_BRIDGETYPE_PIC:
+ ret =
+ (uint64_t *) & (ptr->pic.p_int_ate_ram[ate_index]);
+ break;
+ default:
+ panic
+ ("pcireg_int_ate_addr: unknown bridgetype bridge 0x%p",
+ (void *)ptr);
+ }
+ }
+ return ret;
+}