diff options
author | Russell King <rmk+kernel@arm.linux.org.uk> | 2009-12-06 17:00:33 +0000 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2009-12-06 17:00:33 +0000 |
commit | 3d14b5beba35250c548d3851a2b84fce742d8311 (patch) | |
tree | 065e3d93c3fcbc5ee4c44fa78662393cddbdf6de /arch | |
parent | 0719dc341389882cc834ed18fc9b7fc6006b2b85 (diff) | |
parent | 1bf8e6219552d5dd27012d567ec8c4bb9c2d86b4 (diff) |
Merge branch 'sa1100' into devel
Diffstat (limited to 'arch')
218 files changed, 8257 insertions, 3568 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 7f418bbc261..eef3bbb9707 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -126,4 +126,11 @@ config HAVE_DMA_API_DEBUG config HAVE_DEFAULT_NO_SPIN_MUTEXES bool +config HAVE_HW_BREAKPOINT + bool + depends on HAVE_PERF_EVENTS + select ANON_INODES + select PERF_EVENTS + + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 815680b585e..b3e888638bb 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -61,21 +61,24 @@ register struct thread_info *__current_thread_info __asm__("$8"); /* * Thread information flags: * - these are process state flags and used from assembly - * - pending work-to-be-done flags come first to fit in and immediate operand. + * - pending work-to-be-done flags come first and must be assigned to be + * within bits 0 to 7 to fit in and immediate operand. + * - ALPHA_UAC_SHIFT below must be kept consistent with the unaligned + * control flags. * * TIF_SYSCALL_TRACE is known to be 0 via blbs. */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 1 /* signal pending */ -#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_POLLING_NRFLAG 3 /* poll_idle is polling NEED_RESCHED */ -#define TIF_DIE_IF_KERNEL 4 /* dik recursion lock */ -#define TIF_UAC_NOPRINT 5 /* see sysinfo.h */ -#define TIF_UAC_NOFIX 6 -#define TIF_UAC_SIGBUS 7 -#define TIF_MEMDIE 8 -#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ -#define TIF_NOTIFY_RESUME 10 /* callback before returning to user */ +#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_POLLING_NRFLAG 8 /* poll_idle is polling NEED_RESCHED */ +#define TIF_DIE_IF_KERNEL 9 /* dik recursion lock */ +#define TIF_UAC_NOPRINT 10 /* see sysinfo.h */ +#define TIF_UAC_NOFIX 11 +#define TIF_UAC_SIGBUS 12 +#define TIF_MEMDIE 13 +#define TIF_RESTORE_SIGMASK 14 /* restore signal mask in do_signal */ #define TIF_FREEZE 16 /* is freezing for suspend */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -94,7 +97,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define _TIF_ALLWORK_MASK (_TIF_WORK_MASK \ | _TIF_SYSCALL_TRACE) -#define ALPHA_UAC_SHIFT 6 +#define ALPHA_UAC_SHIFT 10 #define ALPHA_UAC_MASK (1 << TIF_UAC_NOPRINT | 1 << TIF_UAC_NOFIX | \ 1 << TIF_UAC_SIGBUS) diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index 8e059e58b0a..53dd2f1a53a 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -1103,6 +1103,8 @@ marvel_agp_info(void) * Allocate the info structure. */ agp = kmalloc(sizeof(*agp), GFP_KERNEL); + if (!agp) + return NULL; /* * Fill it in. diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c index 76686497b1e..219bf271c0b 100644 --- a/arch/alpha/kernel/core_titan.c +++ b/arch/alpha/kernel/core_titan.c @@ -757,6 +757,8 @@ titan_agp_info(void) * Allocate the info structure. */ agp = kmalloc(sizeof(*agp), GFP_KERNEL); + if (!agp) + return NULL; /* * Fill it in. diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index cc783466142..c0de072b830 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -92,7 +92,7 @@ show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_irqs_cpu(irq, j)); #endif - seq_printf(p, " %14s", irq_desc[irq].chip->typename); + seq_printf(p, " %14s", irq_desc[irq].chip->name); seq_printf(p, " %c%s", (action->flags & IRQF_DISABLED)?'+':' ', action->name); diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 38c805dfc54..cfde865b78e 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -228,7 +228,7 @@ struct irqaction timer_irqaction = { }; static struct irq_chip rtc_irq_type = { - .typename = "RTC", + .name = "RTC", .startup = rtc_startup, .shutdown = rtc_enable_disable, .enable = rtc_enable_disable, diff --git a/arch/alpha/kernel/irq_i8259.c b/arch/alpha/kernel/irq_i8259.c index 50bfec9b588..83a9ac28089 100644 --- a/arch/alpha/kernel/irq_i8259.c +++ b/arch/alpha/kernel/irq_i8259.c @@ -84,7 +84,7 @@ i8259a_end_irq(unsigned int irq) } struct irq_chip i8259a_irq_type = { - .typename = "XT-PIC", + .name = "XT-PIC", .startup = i8259a_startup_irq, .shutdown = i8259a_disable_irq, .enable = i8259a_enable_irq, diff --git a/arch/alpha/kernel/irq_pyxis.c b/arch/alpha/kernel/irq_pyxis.c index 69199a76ec4..989ce46a0cf 100644 --- a/arch/alpha/kernel/irq_pyxis.c +++ b/arch/alpha/kernel/irq_pyxis.c @@ -71,7 +71,7 @@ pyxis_mask_and_ack_irq(unsigned int irq) } static struct irq_chip pyxis_irq_type = { - .typename = "PYXIS", + .name = "PYXIS", .startup = pyxis_startup_irq, .shutdown = pyxis_disable_irq, .enable = pyxis_enable_irq, diff --git a/arch/alpha/kernel/irq_srm.c b/arch/alpha/kernel/irq_srm.c index 85229369a1f..d63e93e1e8b 100644 --- a/arch/alpha/kernel/irq_srm.c +++ b/arch/alpha/kernel/irq_srm.c @@ -49,7 +49,7 @@ srm_end_irq(unsigned int irq) /* Handle interrupts from the SRM, assuming no additional weirdness. */ static struct irq_chip srm_irq_type = { - .typename = "SRM", + .name = "SRM", .startup = srm_startup_irq, .shutdown = srm_disable_irq, .enable = srm_enable_irq, diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c index 382035ef739..20a30b8b965 100644 --- a/arch/alpha/kernel/sys_alcor.c +++ b/arch/alpha/kernel/sys_alcor.c @@ -90,7 +90,7 @@ alcor_end_irq(unsigned int irq) } static struct irq_chip alcor_irq_type = { - .typename = "ALCOR", + .name = "ALCOR", .startup = alcor_startup_irq, .shutdown = alcor_disable_irq, .enable = alcor_enable_irq, diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c index ed349436732..affd0f3f25d 100644 --- a/arch/alpha/kernel/sys_cabriolet.c +++ b/arch/alpha/kernel/sys_cabriolet.c @@ -72,7 +72,7 @@ cabriolet_end_irq(unsigned int irq) } static struct irq_chip cabriolet_irq_type = { - .typename = "CABRIOLET", + .name = "CABRIOLET", .startup = cabriolet_startup_irq, .shutdown = cabriolet_disable_irq, .enable = cabriolet_enable_irq, diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 46e70ece517..d64e1e497e7 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -199,7 +199,7 @@ clipper_set_affinity(unsigned int irq, const struct cpumask *affinity) } static struct irq_chip dp264_irq_type = { - .typename = "DP264", + .name = "DP264", .startup = dp264_startup_irq, .shutdown = dp264_disable_irq, .enable = dp264_enable_irq, @@ -210,7 +210,7 @@ static struct irq_chip dp264_irq_type = { }; static struct irq_chip clipper_irq_type = { - .typename = "CLIPPER", + .name = "CLIPPER", .startup = clipper_startup_irq, .shutdown = clipper_disable_irq, .enable = clipper_enable_irq, diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c index 660c23ef661..df2090ce5e7 100644 --- a/arch/alpha/kernel/sys_eb64p.c +++ b/arch/alpha/kernel/sys_eb64p.c @@ -70,7 +70,7 @@ eb64p_end_irq(unsigned int irq) } static struct irq_chip eb64p_irq_type = { - .typename = "EB64P", + .name = "EB64P", .startup = eb64p_startup_irq, .shutdown = eb64p_disable_irq, .enable = eb64p_enable_irq, diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c index b99ea488d84..3ca1dbcf404 100644 --- a/arch/alpha/kernel/sys_eiger.c +++ b/arch/alpha/kernel/sys_eiger.c @@ -81,7 +81,7 @@ eiger_end_irq(unsigned int irq) } static struct irq_chip eiger_irq_type = { - .typename = "EIGER", + .name = "EIGER", .startup = eiger_startup_irq, .shutdown = eiger_disable_irq, .enable = eiger_enable_irq, diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index ef0b83a070a..7a7ae36fff9 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -119,7 +119,7 @@ jensen_local_end(unsigned int irq) } static struct irq_chip jensen_local_irq_type = { - .typename = "LOCAL", + .name = "LOCAL", .startup = jensen_local_startup, .shutdown = jensen_local_shutdown, .enable = jensen_local_enable, diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index bbfc4f20ca7..0bb3b5c4f69 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -170,7 +170,7 @@ marvel_irq_noop_return(unsigned int irq) } static struct irq_chip marvel_legacy_irq_type = { - .typename = "LEGACY", + .name = "LEGACY", .startup = marvel_irq_noop_return, .shutdown = marvel_irq_noop, .enable = marvel_irq_noop, @@ -180,7 +180,7 @@ static struct irq_chip marvel_legacy_irq_type = { }; static struct irq_chip io7_lsi_irq_type = { - .typename = "LSI", + .name = "LSI", .startup = io7_startup_irq, .shutdown = io7_disable_irq, .enable = io7_enable_irq, @@ -190,7 +190,7 @@ static struct irq_chip io7_lsi_irq_type = { }; static struct irq_chip io7_msi_irq_type = { - .typename = "MSI", + .name = "MSI", .startup = io7_startup_irq, .shutdown = io7_disable_irq, .enable = io7_enable_irq, diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c index 4e366641a08..ee886516981 100644 --- a/arch/alpha/kernel/sys_mikasa.c +++ b/arch/alpha/kernel/sys_mikasa.c @@ -69,7 +69,7 @@ mikasa_end_irq(unsigned int irq) } static struct irq_chip mikasa_irq_type = { - .typename = "MIKASA", + .name = "MIKASA", .startup = mikasa_startup_irq, .shutdown = mikasa_disable_irq, .enable = mikasa_enable_irq, diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c index 35753a173ba..86503fe73a8 100644 --- a/arch/alpha/kernel/sys_noritake.c +++ b/arch/alpha/kernel/sys_noritake.c @@ -74,7 +74,7 @@ noritake_end_irq(unsigned int irq) } static struct irq_chip noritake_irq_type = { - .typename = "NORITAKE", + .name = "NORITAKE", .startup = noritake_startup_irq, .shutdown = noritake_disable_irq, .enable = noritake_enable_irq, diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c index f3aec7e085c..26c322bf89e 100644 --- a/arch/alpha/kernel/sys_rawhide.c +++ b/arch/alpha/kernel/sys_rawhide.c @@ -136,7 +136,7 @@ rawhide_end_irq(unsigned int irq) } static struct irq_chip rawhide_irq_type = { - .typename = "RAWHIDE", + .name = "RAWHIDE", .startup = rawhide_startup_irq, .shutdown = rawhide_disable_irq, .enable = rawhide_enable_irq, diff --git a/arch/alpha/kernel/sys_ruffian.c b/arch/alpha/kernel/sys_ruffian.c index d9f9cfeb993..8de1046fe91 100644 --- a/arch/alpha/kernel/sys_ruffian.c +++ b/arch/alpha/kernel/sys_ruffian.c @@ -66,7 +66,7 @@ ruffian_init_irq(void) common_init_isa_dma(); } -#define RUFFIAN_LATCH ((PIT_TICK_RATE + HZ / 2) / HZ) +#define RUFFIAN_LATCH DIV_ROUND_CLOSEST(PIT_TICK_RATE, HZ) static void __init ruffian_init_rtc(void) diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c index fc924637345..be161129eab 100644 --- a/arch/alpha/kernel/sys_rx164.c +++ b/arch/alpha/kernel/sys_rx164.c @@ -73,7 +73,7 @@ rx164_end_irq(unsigned int irq) } static struct irq_chip rx164_irq_type = { - .typename = "RX164", + .name = "RX164", .startup = rx164_startup_irq, .shutdown = rx164_disable_irq, .enable = rx164_enable_irq, diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c index 426eb6906d0..b2abe27a23c 100644 --- a/arch/alpha/kernel/sys_sable.c +++ b/arch/alpha/kernel/sys_sable.c @@ -502,7 +502,7 @@ sable_lynx_mask_and_ack_irq(unsigned int irq) } static struct irq_chip sable_lynx_irq_type = { - .typename = "SABLE/LYNX", + .name = "SABLE/LYNX", .startup = sable_lynx_startup_irq, .shutdown = sable_lynx_disable_irq, .enable = sable_lynx_enable_irq, diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c index 830318c2166..230464885b5 100644 --- a/arch/alpha/kernel/sys_takara.c +++ b/arch/alpha/kernel/sys_takara.c @@ -75,7 +75,7 @@ takara_end_irq(unsigned int irq) } static struct irq_chip takara_irq_type = { - .typename = "TAKARA", + .name = "TAKARA", .startup = takara_startup_irq, .shutdown = takara_disable_irq, .enable = takara_enable_irq, diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index 88978fc60f8..288053342c8 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -195,7 +195,7 @@ init_titan_irqs(struct irq_chip * ops, int imin, int imax) } static struct irq_chip titan_irq_type = { - .typename = "TITAN", + .name = "TITAN", .startup = titan_startup_irq, .shutdown = titan_disable_irq, .enable = titan_enable_irq, diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c index e91b4c3838a..62fd972e18e 100644 --- a/arch/alpha/kernel/sys_wildfire.c +++ b/arch/alpha/kernel/sys_wildfire.c @@ -158,7 +158,7 @@ wildfire_end_irq(unsigned int irq) } static struct irq_chip wildfire_irq_type = { - .typename = "WILDFIRE", + .name = "WILDFIRE", .startup = wildfire_startup_irq, .shutdown = wildfire_disable_irq, .enable = wildfire_enable_irq, diff --git a/arch/arm/configs/h3600_defconfig b/arch/arm/configs/h3600_defconfig index f6aed7747d4..efa78e144e5 100644 --- a/arch/arm/configs/h3600_defconfig +++ b/arch/arm/configs/h3600_defconfig @@ -1,86 +1,189 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc4 -# Thu Jun 9 01:59:03 2005 +# Linux kernel version: 2.6.32-rc5 +# Sat Oct 24 00:09:30 2009 # CONFIG_ARM=y -CONFIG_MMU=y -CONFIG_UID16=y +CONFIG_SYS_SUPPORTS_APM_EMULATION=y +CONFIG_GENERIC_GPIO=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_GENERIC_IRQ_PROBE=y CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_ARCH_HAS_CPUFREQ=y +CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_IOMAP=y +CONFIG_ARCH_MTD_XIP=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y +CONFIG_VECTORS_BASE=0xffff0000 +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_CONSTRUCTORS=y # -# Code maturity level options +# General setup # CONFIG_EXPERIMENTAL=y -CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y # CONFIG_POSIX_MQUEUE is not set # CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y +# CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set -CONFIG_HOTPLUG=y -CONFIG_KOBJECT_UEVENT=y + +# +# RCU Subsystem +# +CONFIG_TREE_RCU=y +# CONFIG_TREE_PREEMPT_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_TREE_RCU_TRACE is not set # CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_GROUP_SCHED is not set +# CONFIG_CGROUPS is not set +# CONFIG_SYSFS_DEPRECATED_V2 is not set +# CONFIG_RELAY is not set +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_IPC_NS is not set +# CONFIG_USER_NS is not set +# CONFIG_PID_NS is not set +# CONFIG_NET_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +CONFIG_RD_BZIP2=y +CONFIG_RD_LZMA=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y # CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y +CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y -CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y CONFIG_SHMEM=y -CONFIG_CC_ALIGN_FUNCTIONS=0 -CONFIG_CC_ALIGN_LABELS=0 -CONFIG_CC_ALIGN_LOOPS=0 -CONFIG_CC_ALIGN_JUMPS=0 -# CONFIG_TINY_SHMEM is not set -CONFIG_BASE_SMALL=0 +CONFIG_AIO=y + +# +# Kernel Performance Events And Counters +# +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLUB_DEBUG=y +CONFIG_COMPAT_BRK=y +# CONFIG_SLAB is not set +CONFIG_SLUB=y +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +CONFIG_HAVE_OPROFILE=y +# CONFIG_KPROBES is not set +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_CLK=y # -# Loadable module support +# GCOV-based kernel profiling # +# CONFIG_SLOW_WORK is not set +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set # CONFIG_MODULE_UNLOAD is not set -CONFIG_OBSOLETE_MODPARM=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set -# CONFIG_KMOD is not set +CONFIG_BLOCK=y +# CONFIG_LBDAF is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +# CONFIG_IOSCHED_AS is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +# CONFIG_DEFAULT_AS is not set +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +CONFIG_DEFAULT_NOOP=y +CONFIG_DEFAULT_IOSCHED="noop" +CONFIG_FREEZER=y # # System Type # -# CONFIG_ARCH_CLPS7500 is not set +CONFIG_MMU=y +# CONFIG_ARCH_AAEC2000 is not set +# CONFIG_ARCH_INTEGRATOR is not set +# CONFIG_ARCH_REALVIEW is not set +# CONFIG_ARCH_VERSATILE is not set +# CONFIG_ARCH_AT91 is not set # CONFIG_ARCH_CLPS711X is not set -# CONFIG_ARCH_CO285 is not set +# CONFIG_ARCH_GEMINI is not set # CONFIG_ARCH_EBSA110 is not set +# CONFIG_ARCH_EP93XX is not set # CONFIG_ARCH_FOOTBRIDGE is not set -# CONFIG_ARCH_INTEGRATOR is not set -# CONFIG_ARCH_IOP3XX is not set -# CONFIG_ARCH_IXP4XX is not set +# CONFIG_ARCH_MXC is not set +# CONFIG_ARCH_STMP3XXX is not set +# CONFIG_ARCH_NETX is not set +# CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_NOMADIK is not set +# CONFIG_ARCH_IOP13XX is not set +# CONFIG_ARCH_IOP32X is not set +# CONFIG_ARCH_IOP33X is not set +# CONFIG_ARCH_IXP23XX is not set # CONFIG_ARCH_IXP2000 is not set +# CONFIG_ARCH_IXP4XX is not set # CONFIG_ARCH_L7200 is not set +# CONFIG_ARCH_KIRKWOOD is not set +# CONFIG_ARCH_LOKI is not set +# CONFIG_ARCH_MV78XX0 is not set +# CONFIG_ARCH_ORION5X is not set +# CONFIG_ARCH_MMP is not set +# CONFIG_ARCH_KS8695 is not set +# CONFIG_ARCH_NS9XXX is not set +# CONFIG_ARCH_W90X900 is not set +# CONFIG_ARCH_PNX4008 is not set # CONFIG_ARCH_PXA is not set +# CONFIG_ARCH_MSM is not set # CONFIG_ARCH_RPC is not set CONFIG_ARCH_SA1100=y # CONFIG_ARCH_S3C2410 is not set +# CONFIG_ARCH_S3C64XX is not set +# CONFIG_ARCH_S5PC1XX is not set # CONFIG_ARCH_SHARK is not set # CONFIG_ARCH_LH7A40X is not set +# CONFIG_ARCH_U300 is not set +# CONFIG_ARCH_DAVINCI is not set # CONFIG_ARCH_OMAP is not set -# CONFIG_ARCH_VERSATILE is not set -# CONFIG_ARCH_IMX is not set -# CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_BCMRING is not set # # SA11x0 Implementations @@ -106,27 +209,31 @@ CONFIG_CPU_32=y CONFIG_CPU_SA1100=y CONFIG_CPU_32v4=y CONFIG_CPU_ABRT_EV4=y +CONFIG_CPU_PABRT_LEGACY=y CONFIG_CPU_CACHE_V4WB=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WB=y -CONFIG_CPU_MINICACHE=y +CONFIG_CPU_CP15=y +CONFIG_CPU_CP15_MMU=y # # Processor Features # +# CONFIG_CPU_ICACHE_DISABLE is not set +# CONFIG_CPU_DCACHE_DISABLE is not set +CONFIG_ARM_L1_CACHE_SHIFT=5 # # Bus support # CONFIG_ISA=y -CONFIG_ISA_DMA_API=y - -# -# PCCARD (PCMCIA/CardBus) support -# +# CONFIG_PCI_SYSCALL is not set +# CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCCARD=y # CONFIG_PCMCIA_DEBUG is not set CONFIG_PCMCIA=y +CONFIG_PCMCIA_LOAD_CIS=y +CONFIG_PCMCIA_IOCTL=y # # PC-card bridges @@ -138,11 +245,41 @@ CONFIG_PCMCIA_SA1100=y # # Kernel Features # -# CONFIG_SMP is not set +CONFIG_TICK_ONESHOT=y +# CONFIG_NO_HZ is not set +# CONFIG_HIGH_RES_TIMERS is not set +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_VMSPLIT_3G=y +# CONFIG_VMSPLIT_2G is not set +# CONFIG_VMSPLIT_1G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set -CONFIG_DISCONTIGMEM=y +CONFIG_HZ=100 +# CONFIG_AEABI is not set +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_DEFAULT=y +# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set +# CONFIG_HIGHMEM is not set +CONFIG_SELECT_MEMORY_MODEL=y +# CONFIG_FLATMEM_MANUAL is not set +# CONFIG_DISCONTIGMEM_MANUAL is not set +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_HAVE_MEMORY_PRESENT=y +CONFIG_SPARSEMEM_EXTREME=y +CONFIG_SPLIT_PTLOCK_CPUS=4096 +# CONFIG_PHYS_ADDR_T_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_VIRT_TO_BUS=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y +# CONFIG_KSM is not set +CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 # CONFIG_LEDS is not set CONFIG_ALIGNMENT_TRAP=y +# CONFIG_UACCESS_WITH_MEMCPY is not set # # Boot options @@ -151,22 +288,26 @@ CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_CMDLINE="" # CONFIG_XIP_KERNEL is not set +# CONFIG_KEXEC is not set # -# CPU Frequency scaling +# CPU Power Management # CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_TABLE=y # CONFIG_CPU_FREQ_DEBUG is not set -CONFIG_CPU_FREQ_STAT=y -# CONFIG_CPU_FREQ_STAT_DETAILS is not set +# CONFIG_CPU_FREQ_STAT is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set # CONFIG_CPU_FREQ_GOV_PERFORMANCE is not set # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_GOV_USERSPACE=y # CONFIG_CPU_FREQ_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_SA1100=y +# CONFIG_CPU_IDLE is not set # # Floating point emulation @@ -183,6 +324,8 @@ CONFIG_FPE_NWFPE=y # Userspace binary formats # CONFIG_BINFMT_ELF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_HAVE_AOUT=y # CONFIG_BINFMT_AOUT is not set # CONFIG_BINFMT_MISC is not set # CONFIG_ARTHUR is not set @@ -191,8 +334,120 @@ CONFIG_BINFMT_ELF=y # Power management options # CONFIG_PM=y -# CONFIG_PM_LEGACY is not set -# CONFIG_APM is not set +# CONFIG_PM_DEBUG is not set +CONFIG_PM_SLEEP=y +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +# CONFIG_APM_EMULATION is not set +# CONFIG_PM_RUNTIME is not set +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_NET=y + +# +# Networking options +# +# CONFIG_PACKET is not set +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y +CONFIG_INET_LRO=y +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IPV6 is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_RDS is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set +# CONFIG_IEEE802154 is not set +# CONFIG_NET_SCHED is not set +# CONFIG_DCB is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +CONFIG_IRDA=m + +# +# IrDA protocols +# +CONFIG_IRLAN=m +CONFIG_IRNET=m +CONFIG_IRCOMM=m +# CONFIG_IRDA_ULTRA is not set + +# +# IrDA options +# +# CONFIG_IRDA_CACHE_LAST_LSAP is not set +# CONFIG_IRDA_FAST_RR is not set +# CONFIG_IRDA_DEBUG is not set + +# +# Infrared-port device drivers +# + +# +# SIR device drivers +# +# CONFIG_IRTTY_SIR is not set + +# +# Dongle support +# + +# +# FIR device drivers +# +CONFIG_SA1100_FIR=m +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set +# CONFIG_WIRELESS is not set +# CONFIG_WIMAX is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set # # Device Drivers @@ -201,15 +456,17 @@ CONFIG_PM=y # # Generic Driver Options # +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_FW_LOADER is not set - -# -# Memory Technology Devices (MTD) -# +CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set CONFIG_MTD=y # CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_TESTS is not set # CONFIG_MTD_CONCAT is not set CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y @@ -218,15 +475,20 @@ CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1 # CONFIG_MTD_REDBOOT_PARTS_READONLY is not set # CONFIG_MTD_CMDLINE_PARTS is not set # CONFIG_MTD_AFS_PARTS is not set +# CONFIG_MTD_AR7_PARTS is not set # # User Modules And Translation Layers # CONFIG_MTD_CHAR=y +CONFIG_MTD_BLKDEVS=y CONFIG_MTD_BLOCK=y # CONFIG_FTL is not set # CONFIG_NFTL is not set # CONFIG_INFTL is not set +# CONFIG_RFD_FTL is not set +# CONFIG_SSFDC is not set +# CONFIG_MTD_OOPS is not set # # RAM/ROM/Flash chip drivers @@ -249,6 +511,7 @@ CONFIG_MTD_MAP_BANK_WIDTH_4=y CONFIG_MTD_CFI_I2=y # CONFIG_MTD_CFI_I4 is not set # CONFIG_MTD_CFI_I8 is not set +# CONFIG_MTD_OTP is not set CONFIG_MTD_CFI_INTELEXT=y # CONFIG_MTD_CFI_AMDSTD is not set # CONFIG_MTD_CFI_STAA is not set @@ -265,7 +528,7 @@ CONFIG_MTD_CFI_UTIL=y # CONFIG_MTD_PHYSMAP is not set # CONFIG_MTD_ARM_INTEGRATOR is not set CONFIG_MTD_SA1100=y -# CONFIG_MTD_EDB7312 is not set +# CONFIG_MTD_PLATRAM is not set # # Self-contained MTD device drivers @@ -273,7 +536,6 @@ CONFIG_MTD_SA1100=y # CONFIG_MTD_SLRAM is not set # CONFIG_MTD_PHRAM is not set # CONFIG_MTD_MTDRAM is not set -# CONFIG_MTD_BLKMTD is not set # CONFIG_MTD_BLOCK2MTD is not set # @@ -282,26 +544,21 @@ CONFIG_MTD_SA1100=y # CONFIG_MTD_DOC2000 is not set # CONFIG_MTD_DOC2001 is not set # CONFIG_MTD_DOC2001PLUS is not set - -# -# NAND Flash Device Drivers -# # CONFIG_MTD_NAND is not set +# CONFIG_MTD_ONENAND is not set # -# Parallel port support +# LPDDR flash memory drivers # -# CONFIG_PARPORT is not set +# CONFIG_MTD_LPDDR is not set # -# Plug and Play support +# UBI - Unsorted block images # +# CONFIG_MTD_UBI is not set +# CONFIG_PARPORT is not set # CONFIG_PNP is not set - -# -# Block devices -# -# CONFIG_BLK_DEV_XD is not set +CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=m # CONFIG_BLK_DEV_CRYPTOLOOP is not set @@ -309,212 +566,58 @@ CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" +# CONFIG_BLK_DEV_XIP is not set # CONFIG_CDROM_PKTCDVD is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y # CONFIG_ATA_OVER_ETH is not set +# CONFIG_MG_DISK is not set +# CONFIG_MISC_DEVICES is not set +CONFIG_HAVE_IDE=y +CONFIG_IDE=y # -# ATA/ATAPI/MFM/RLL support -# -CONFIG_IDE=m -CONFIG_BLK_DEV_IDE=m - -# -# Please see Documentation/ide.txt for help/info on IDE drives +# Please see Documentation/ide/ide.txt for help/info on IDE drives # # CONFIG_BLK_DEV_IDE_SATA is not set -CONFIG_BLK_DEV_IDEDISK=m -# CONFIG_IDEDISK_MULTI_MODE is not set -# CONFIG_BLK_DEV_IDECS is not set -CONFIG_BLK_DEV_IDECD=m +CONFIG_IDE_GD=y +CONFIG_IDE_GD_ATA=y +# CONFIG_IDE_GD_ATAPI is not set +CONFIG_BLK_DEV_IDECS=y +# CONFIG_BLK_DEV_IDECD is not set # CONFIG_BLK_DEV_IDETAPE is not set -# CONFIG_BLK_DEV_IDEFLOPPY is not set # CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_PROC_FS=y # # IDE chipset support/bugfixes # -CONFIG_IDE_GENERIC=m -# CONFIG_IDE_ARM is not set -# CONFIG_IDE_CHIPSETS is not set +# CONFIG_BLK_DEV_PLATFORM is not set # CONFIG_BLK_DEV_IDEDMA is not set -# CONFIG_IDEDMA_AUTO is not set -# CONFIG_BLK_DEV_HD is not set # # SCSI device support # +# CONFIG_RAID_ATTRS is not set # CONFIG_SCSI is not set - -# -# Multi-device support (RAID and LVM) -# +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_ATA is not set # CONFIG_MD is not set - -# -# Fusion MPT device support -# - -# -# IEEE 1394 (FireWire) support -# - -# -# I2O device support -# - -# -# Networking support -# -CONFIG_NET=y - -# -# Networking options -# -# CONFIG_PACKET is not set -CONFIG_UNIX=y -# CONFIG_NET_KEY is not set -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -# CONFIG_IP_PNP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_TUNNEL is not set -# CONFIG_IP_TCPDIAG is not set -# CONFIG_IP_TCPDIAG_IPV6 is not set -# CONFIG_IPV6 is not set -# CONFIG_NETFILTER is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set -# CONFIG_NET_CLS_ROUTE is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set -# CONFIG_HAMRADIO is not set -CONFIG_IRDA=m - -# -# IrDA protocols -# -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -# CONFIG_IRDA_ULTRA is not set - -# -# IrDA options -# -# CONFIG_IRDA_CACHE_LAST_LSAP is not set -# CONFIG_IRDA_FAST_RR is not set -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# - -# -# SIR device drivers -# -# CONFIG_IRTTY_SIR is not set - -# -# Dongle support -# - -# -# Old SIR device drivers -# -# CONFIG_IRPORT_SIR is not set - -# -# Old Serial dongle support -# - -# -# FIR device drivers -# -# CONFIG_NSC_FIR is not set -# CONFIG_WINBOND_FIR is not set -# CONFIG_SMC_IRCC_FIR is not set -# CONFIG_ALI_FIR is not set -CONFIG_SA1100_FIR=m -# CONFIG_VIA_FIR is not set -# CONFIG_BT is not set CONFIG_NETDEVICES=y # CONFIG_DUMMY is not set # CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set - -# -# ARCnet devices -# +# CONFIG_VETH is not set # CONFIG_ARCNET is not set - -# -# Ethernet (10 or 100Mbit) -# # CONFIG_NET_ETHERNET is not set - -# -# Ethernet (1000 Mbit) -# - -# -# Ethernet (10000 Mbit) -# - -# -# Token Ring devices -# +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set # CONFIG_TR is not set +# CONFIG_WLAN is not set # -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# PCMCIA network device support +# Enable WiMAX (Networking options) to see the WiMAX drivers # CONFIG_NET_PCMCIA=y # CONFIG_PCMCIA_3C589 is not set @@ -525,10 +628,6 @@ CONFIG_PCMCIA_PCNET=y # CONFIG_PCMCIA_SMC91C92 is not set # CONFIG_PCMCIA_XIRC2PS is not set # CONFIG_PCMCIA_AXNET is not set - -# -# Wan interfaces -# # CONFIG_WAN is not set CONFIG_PPP=m # CONFIG_PPP_MULTILINK is not set @@ -537,20 +636,23 @@ CONFIG_PPP_ASYNC=m # CONFIG_PPP_SYNC_TTY is not set CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +# CONFIG_PPP_MPPE is not set # CONFIG_PPPOE is not set +# CONFIG_PPPOL2TP is not set # CONFIG_SLIP is not set -# CONFIG_SHAPER is not set +CONFIG_SLHC=m # CONFIG_NETCONSOLE is not set - -# -# ISDN subsystem -# +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set # CONFIG_ISDN is not set +# CONFIG_PHONE is not set # # Input device support # CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set # # Userland interfaces @@ -560,7 +662,6 @@ CONFIG_INPUT_MOUSEDEV_PSAUX=y CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_TSDEV is not set # CONFIG_INPUT_EVDEV is not set # CONFIG_INPUT_EVBUG is not set @@ -568,47 +669,42 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # Input Device Drivers # CONFIG_INPUT_KEYBOARD=y -CONFIG_KEYBOARD_ATKBD=y -# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_ATKBD is not set # CONFIG_KEYBOARD_LKKBD is not set -# CONFIG_KEYBOARD_XTKBD is not set +CONFIG_KEYBOARD_GPIO=y +# CONFIG_KEYBOARD_MATRIX is not set # CONFIG_KEYBOARD_NEWTON is not set -CONFIG_INPUT_MOUSE=y -CONFIG_MOUSE_PS2=y -# CONFIG_MOUSE_SERIAL is not set -# CONFIG_MOUSE_INPORT is not set -# CONFIG_MOUSE_LOGIBM is not set -# CONFIG_MOUSE_PC110PAD is not set -# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_KEYBOARD_OPENCORES is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_INPUT_MOUSE is not set # CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set # CONFIG_INPUT_MISC is not set # # Hardware I/O ports # -CONFIG_SERIO=y -CONFIG_SERIO_SERPORT=y -CONFIG_SERIO_LIBPS2=y -# CONFIG_SERIO_RAW is not set +# CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices # CONFIG_VT=y +CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set +CONFIG_DEVKMEM=y # CONFIG_SERIAL_NONSTANDARD is not set # # Serial drivers # -CONFIG_SERIAL_8250=m -# CONFIG_SERIAL_8250_CS is not set -CONFIG_SERIAL_8250_NR_UARTS=4 -# CONFIG_SERIAL_8250_EXTENDED is not set +# CONFIG_SERIAL_8250 is not set # # Non-8250 serial port support @@ -618,71 +714,125 @@ CONFIG_SERIAL_SA1100_CONSOLE=y CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set # -# IPMI +# PCMCIA character devices # -# CONFIG_IPMI_HANDLER is not set +# CONFIG_SYNCLINK_CS is not set +# CONFIG_CARDMAN_4000 is not set +# CONFIG_CARDMAN_4040 is not set +# CONFIG_IPWIRELESS is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +CONFIG_DEVPORT=y +# CONFIG_I2C is not set +# CONFIG_SPI is not set # -# Watchdog Cards +# PPS support # -# CONFIG_WATCHDOG is not set -# CONFIG_NVRAM is not set -# CONFIG_RTC is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set +# CONFIG_PPS is not set +CONFIG_ARCH_REQUIRE_GPIOLIB=y +CONFIG_GPIOLIB=y +# CONFIG_GPIO_SYSFS is not set # -# Ftape, the floppy tape device driver +# Memory mapped GPIO expanders: # -# CONFIG_DRM is not set # -# PCMCIA character devices +# I2C GPIO expanders: # -# CONFIG_SYNCLINK_CS is not set -# CONFIG_RAW_DRIVER is not set # -# TPM devices +# PCI GPIO expanders: # # -# I2C support +# SPI GPIO expanders: # -# CONFIG_I2C is not set # -# Misc devices +# AC97 GPIO expanders: # +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +# CONFIG_HWMON is not set +# CONFIG_THERMAL is not set +# CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y # -# Multimedia devices +# Sonics Silicon Backplane # -# CONFIG_VIDEO_DEV is not set +# CONFIG_SSB is not set # -# Digital Video Broadcasting Devices +# Multifunction device drivers # -# CONFIG_DVB is not set +# CONFIG_MFD_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_MFD_ASIC3 is not set +CONFIG_HTC_EGPIO=y +# CONFIG_HTC_PASIC3 is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_MFD_T7L66XB is not set +# CONFIG_MFD_TC6387XB is not set +# CONFIG_MFD_TC6393XB is not set + +# +# Multimedia Capabilities Port drivers +# +# CONFIG_MCP_SA11X0 is not set +# CONFIG_REGULATOR is not set +# CONFIG_MEDIA_SUPPORT is not set # # Graphics support # +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y -CONFIG_FB_SOFT_CURSOR=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +# CONFIG_FB_SVGALIB is not set # CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set # CONFIG_FB_MODE_HELPERS is not set # CONFIG_FB_TILEBLITTING is not set + +# +# Frame buffer hardware drivers +# CONFIG_FB_SA1100=y # CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set # # Console display driver support @@ -691,65 +841,54 @@ CONFIG_FB_SA1100=y # CONFIG_MDA_CONSOLE is not set CONFIG_DUMMY_CONSOLE=y # CONFIG_FRAMEBUFFER_CONSOLE is not set - -# -# Logo configuration -# # CONFIG_LOGO is not set -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set - -# -# Sound -# -CONFIG_SOUND=y - -# -# Advanced Linux Sound Architecture -# -# CONFIG_SND is not set - -# -# Open Sound System -# -# CONFIG_SOUND_PRIME is not set - -# -# USB support -# -CONFIG_USB_ARCH_HAS_HCD=y -# CONFIG_USB_ARCH_HAS_OHCI is not set -# CONFIG_USB is not set +# CONFIG_SOUND is not set +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +# CONFIG_NEW_LEDS is not set +# CONFIG_ACCESSIBILITY is not set +CONFIG_RTC_LIB=y +# CONFIG_RTC_CLASS is not set +# CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set +# CONFIG_UIO is not set # -# USB Gadget Support +# TI VLYNQ # -# CONFIG_USB_GADGET is not set - -# -# MMC/SD Card support -# -# CONFIG_MMC is not set +# CONFIG_STAGING is not set # # File systems # CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set # CONFIG_EXT3_FS is not set -# CONFIG_JBD is not set +# CONFIG_EXT4_FS is not set # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set - -# -# XFS support -# +# CONFIG_FS_POSIX_ACL is not set # CONFIG_XFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_QUOTA is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set +# CONFIG_NILFS2_FS is not set +CONFIG_FILE_LOCKING=y +CONFIG_FSNOTIFY=y CONFIG_DNOTIFY=y +# CONFIG_INOTIFY is not set +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set # CONFIG_AUTOFS_FS is not set # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set # # CD-ROM/DVD Filesystems @@ -771,16 +910,13 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # Pseudo filesystems # CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVPTS_FS_XATTR is not set # CONFIG_TMPFS is not set # CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y - -# -# Miscellaneous filesystems -# +# CONFIG_CONFIGFS_FS is not set +CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_HFS_FS is not set @@ -788,34 +924,37 @@ CONFIG_RAMFS=y # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set CONFIG_JFFS2_FS=y CONFIG_JFFS2_FS_DEBUG=0 -# CONFIG_JFFS2_FS_NAND is not set -# CONFIG_JFFS2_FS_NOR_ECC is not set +CONFIG_JFFS2_FS_WRITEBUFFER=y +# CONFIG_JFFS2_FS_WBUF_VERIFY is not set +# CONFIG_JFFS2_SUMMARY is not set +# CONFIG_JFFS2_FS_XATTR is not set # CONFIG_JFFS2_COMPRESSION_OPTIONS is not set CONFIG_JFFS2_ZLIB=y +# CONFIG_JFFS2_LZO is not set CONFIG_JFFS2_RTIME=y # CONFIG_JFFS2_RUBIN is not set CONFIG_CRAMFS=m +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set - -# -# Network File Systems -# +CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V3 is not set # CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set CONFIG_NFSD=m # CONFIG_NFSD_V3 is not set -CONFIG_NFSD_TCP=y +# CONFIG_NFSD_V4 is not set CONFIG_LOCKD=y CONFIG_EXPORTFS=m +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -831,10 +970,6 @@ CONFIG_SMB_FS=m # # CONFIG_PARTITION_ADVANCED is not set CONFIG_MSDOS_PARTITION=y - -# -# Native Language Support -# CONFIG_NLS=y CONFIG_NLS_DEFAULT="iso8859-1" # CONFIG_NLS_CODEPAGE_437 is not set @@ -875,20 +1010,34 @@ CONFIG_NLS_DEFAULT="iso8859-1" # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set # CONFIG_NLS_UTF8 is not set - -# -# Profiling support -# -# CONFIG_PROFILING is not set +# CONFIG_DLM is not set # # Kernel hacking # # CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_STRIP_ASM_SYMS is not set +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set # CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_SLUB_DEBUG_ON is not set +# CONFIG_SLUB_STATS is not set CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_MEMORY_INIT=y CONFIG_FRAME_POINTER=y +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +# CONFIG_LATENCYTOP is not set +# CONFIG_SYSCTL_SYSCALL_CHECK is not set +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_TRACING_SUPPORT=y +# CONFIG_FTRACE is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y # CONFIG_DEBUG_USER is not set # @@ -896,21 +1045,120 @@ CONFIG_FRAME_POINTER=y # # CONFIG_KEYS is not set # CONFIG_SECURITY is not set +# CONFIG_SECURITYFS is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +# CONFIG_CRYPTO_FIPS is not set +CONFIG_CRYPTO_ALGAPI=m +CONFIG_CRYPTO_ALGAPI2=m +CONFIG_CRYPTO_RNG=m +CONFIG_CRYPTO_RNG2=m +# CONFIG_CRYPTO_MANAGER is not set +# CONFIG_CRYPTO_MANAGER2 is not set +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +# CONFIG_CRYPTO_CBC is not set +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +# CONFIG_CRYPTO_ECB is not set +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set +# CONFIG_CRYPTO_VMAC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_GHASH is not set +# CONFIG_CRYPTO_MD4 is not set +# CONFIG_CRYPTO_MD5 is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=m +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_DES is not set +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set # -# Cryptographic options +# Compression # -# CONFIG_CRYPTO is not set +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_ZLIB is not set +# CONFIG_CRYPTO_LZO is not set # -# Hardware crypto devices +# Random Number Generation # +CONFIG_CRYPTO_ANSI_CPRNG=m +CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines # +CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_LAST_BIT=y CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y +# CONFIG_CRC7 is not set # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +CONFIG_DECOMPRESS_GZIP=y +CONFIG_DECOMPRESS_BZIP2=y +CONFIG_DECOMPRESS_LZMA=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_NLATTR=y diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig index 4e5c07f4e45..03a7f3857c5 100644 --- a/arch/arm/mach-sa1100/Kconfig +++ b/arch/arm/mach-sa1100/Kconfig @@ -53,23 +53,23 @@ config SA1100_COLLIE config SA1100_H3100 bool "Compaq iPAQ H3100" + select HTC_EGPIO help Say Y here if you intend to run this kernel on the Compaq iPAQ H3100 handheld computer. Information about this machine and the Linux port to this machine can be found at: <http://www.handhelds.org/Compaq/index.html#iPAQ_H3100> - <http://www.compaq.com/products/handhelds/pocketpc/> config SA1100_H3600 bool "Compaq iPAQ H3600/H3700" + select HTC_EGPIO help Say Y here if you intend to run this kernel on the Compaq iPAQ H3600 handheld computer. Information about this machine and the Linux port to this machine can be found at: <http://www.handhelds.org/Compaq/index.html#iPAQ_H3600> - <http://www.compaq.com/products/handhelds/pocketpc/> config SA1100_BADGE4 bool "HP Labs BadgePAD 4" diff --git a/arch/arm/mach-sa1100/Makefile b/arch/arm/mach-sa1100/Makefile index bb7b8198d0c..89349c1dd7a 100644 --- a/arch/arm/mach-sa1100/Makefile +++ b/arch/arm/mach-sa1100/Makefile @@ -25,8 +25,8 @@ led-$(CONFIG_SA1100_CERF) += leds-cerf.o obj-$(CONFIG_SA1100_COLLIE) += collie.o -obj-$(CONFIG_SA1100_H3100) += h3600.o -obj-$(CONFIG_SA1100_H3600) += h3600.o +obj-$(CONFIG_SA1100_H3100) += h3100.o h3xxx.o +obj-$(CONFIG_SA1100_H3600) += h3600.o h3xxx.o obj-$(CONFIG_SA1100_HACKKIT) += hackkit.o led-$(CONFIG_SA1100_HACKKIT) += leds-hackkit.o diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index 55e64477a87..169e5b87dbf 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -249,10 +249,10 @@ static void __init assabet_init(void) #endif } - sa11x0_set_flash_data(&assabet_flash_data, assabet_flash_resources, - ARRAY_SIZE(assabet_flash_resources)); - sa11x0_set_irda_data(&assabet_irda_data); - sa11x0_set_mcp_data(&assabet_mcp_data); + sa11x0_register_mtd(&assabet_flash_data, assabet_flash_resources, + ARRAY_SIZE(assabet_flash_resources)); + sa11x0_register_irda(&assabet_irda_data); + sa11x0_register_mcp(&assabet_mcp_data); } /* diff --git a/arch/arm/mach-sa1100/badge4.c b/arch/arm/mach-sa1100/badge4.c index ab5883b39dd..051ec0f0023 100644 --- a/arch/arm/mach-sa1100/badge4.c +++ b/arch/arm/mach-sa1100/badge4.c @@ -212,7 +212,7 @@ static int __init badge4_init(void) /* maybe turn on 5v0 from the start */ badge4_set_5V(BADGE4_5V_INITIALLY, five_v_on); - sa11x0_set_flash_data(&badge4_flash_data, &badge4_flash_resource, 1); + sa11x0_register_mtd(&badge4_flash_data, &badge4_flash_resource, 1); return 0; } diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c index fd3ad9cfc91..bc950ef418a 100644 --- a/arch/arm/mach-sa1100/cerf.c +++ b/arch/arm/mach-sa1100/cerf.c @@ -129,8 +129,8 @@ static struct mcp_plat_data cerf_mcp_data = { static void __init cerf_init(void) { platform_add_devices(cerf_devices, ARRAY_SIZE(cerf_devices)); - sa11x0_set_flash_data(&cerf_flash_data, &cerf_flash_resource, 1); - sa11x0_set_mcp_data(&cerf_mcp_data); + sa11x0_register_mtd(&cerf_flash_data, &cerf_flash_resource, 1); + sa11x0_register_mcp(&cerf_mcp_data); } MACHINE_START(CERF, "Intrinsyc CerfBoard/CerfCube") diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index bbf2ebcc306..9982c5c28ed 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -26,6 +26,7 @@ #include <linux/mtd/partitions.h> #include <linux/timer.h> #include <linux/gpio.h> +#include <linux/pda_power.h> #include <mach/hardware.h> #include <asm/mach-types.h> @@ -56,6 +57,7 @@ static struct resource collie_scoop_resources[] = { static struct scoop_config collie_scoop_setup = { .io_dir = COLLIE_SCOOP_IO_DIR, .io_out = COLLIE_SCOOP_IO_OUT, + .gpio_base = COLLIE_SCOOP_GPIO_BASE, }; struct platform_device colliescoop_device = { @@ -85,6 +87,70 @@ static struct scoop_pcmcia_config collie_pcmcia_config = { static struct mcp_plat_data collie_mcp_data = { .mccr0 = MCCR0_ADM | MCCR0_ExtClk, .sclk_rate = 9216000, + .gpio_base = COLLIE_TC35143_GPIO_BASE, +}; + +/* + * Collie AC IN + */ +static int collie_power_init(struct device *dev) +{ + int ret = gpio_request(COLLIE_GPIO_AC_IN, "ac in"); + if (ret) + goto err_gpio_req; + + ret = gpio_direction_input(COLLIE_GPIO_AC_IN); + if (ret) + goto err_gpio_in; + + return 0; + +err_gpio_in: + gpio_free(COLLIE_GPIO_AC_IN); +err_gpio_req: + return ret; +} + +static void collie_power_exit(struct device *dev) +{ + gpio_free(COLLIE_GPIO_AC_IN); +} + +static int collie_power_ac_online(void) +{ + return gpio_get_value(COLLIE_GPIO_AC_IN) == 2; +} + +static char *collie_ac_supplied_to[] = { + "main-battery", + "backup-battery", +}; + +static struct pda_power_pdata collie_power_data = { + .init = collie_power_init, + .is_ac_online = collie_power_ac_online, + .exit = collie_power_exit, + .supplied_to = collie_ac_supplied_to, + .num_supplicants = ARRAY_SIZE(collie_ac_supplied_to), +}; + +static struct resource collie_power_resource[] = { + { + .name = "ac", + .start = gpio_to_irq(COLLIE_GPIO_AC_IN), + .end = gpio_to_irq(COLLIE_GPIO_AC_IN), + .flags = IORESOURCE_IRQ | + IORESOURCE_IRQ_HIGHEDGE | + IORESOURCE_IRQ_LOWEDGE, + }, +}; + +static struct platform_device collie_power_device = { + .name = "pda-power", + .id = -1, + .dev.platform_data = &collie_power_data, + .resource = collie_power_resource, + .num_resources = ARRAY_SIZE(collie_power_resource), }; #ifdef CONFIG_SHARP_LOCOMO @@ -178,6 +244,7 @@ struct platform_device collie_locomo_device = { static struct platform_device *devices[] __initdata = { &collie_locomo_device, &colliescoop_device, + &collie_power_device, }; static struct mtd_partition collie_partitions[] = { @@ -248,22 +315,24 @@ static void __init collie_init(void) GPDR = GPIO_LDD8 | GPIO_LDD9 | GPIO_LDD10 | GPIO_LDD11 | GPIO_LDD12 | GPIO_LDD13 | GPIO_LDD14 | GPIO_LDD15 | GPIO_SSP_TXD | GPIO_SSP_SCLK | GPIO_SSP_SFRM | GPIO_SDLC_SCLK | - COLLIE_GPIO_UCB1x00_RESET | COLLIE_GPIO_nMIC_ON | - COLLIE_GPIO_nREMOCON_ON | GPIO_32_768kHz; + _COLLIE_GPIO_UCB1x00_RESET | _COLLIE_GPIO_nMIC_ON | + _COLLIE_GPIO_nREMOCON_ON | GPIO_32_768kHz; PPDR = PPC_LDD0 | PPC_LDD1 | PPC_LDD2 | PPC_LDD3 | PPC_LDD4 | PPC_LDD5 | PPC_LDD6 | PPC_LDD7 | PPC_L_PCLK | PPC_L_LCLK | PPC_L_FCLK | PPC_L_BIAS | PPC_TXD1 | PPC_TXD2 | PPC_TXD3 | PPC_TXD4 | PPC_SCLK | PPC_SFRM; - PWER = COLLIE_GPIO_AC_IN | COLLIE_GPIO_CO | COLLIE_GPIO_ON_KEY | - COLLIE_GPIO_WAKEUP | COLLIE_GPIO_nREMOCON_INT | PWER_RTC; + PWER = _COLLIE_GPIO_AC_IN | _COLLIE_GPIO_CO | _COLLIE_GPIO_ON_KEY | + _COLLIE_GPIO_WAKEUP | _COLLIE_GPIO_nREMOCON_INT | PWER_RTC; - PGSR = COLLIE_GPIO_nREMOCON_ON; + PGSR = _COLLIE_GPIO_nREMOCON_ON; PSDR = PPC_RXD1 | PPC_RXD2 | PPC_RXD3 | PPC_RXD4; PCFR = PCFR_OPDE; + GPSR |= _COLLIE_GPIO_UCB1x00_RESET; + platform_scoop_config = &collie_pcmcia_config; @@ -272,9 +341,9 @@ static void __init collie_init(void) printk(KERN_WARNING "collie: Unable to register LoCoMo device\n"); } - sa11x0_set_flash_data(&collie_flash_data, collie_flash_resources, - ARRAY_SIZE(collie_flash_resources)); - sa11x0_set_mcp_data(&collie_mcp_data); + sa11x0_register_mtd(&collie_flash_data, collie_flash_resources, + ARRAY_SIZE(collie_flash_resources)); + sa11x0_register_mcp(&collie_mcp_data); sharpsl_save_param(); } diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 23cfdd59395..9faea1511c1 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -162,6 +162,17 @@ static void sa1100_power_off(void) PMCR = PMCR_SF; } +static void sa11x0_register_device(struct platform_device *dev, void *data) +{ + int err; + dev->dev.platform_data = data; + err = platform_device_register(dev); + if (err) + printk(KERN_ERR "Unable to register device %s: %d\n", + dev->name, err); +} + + static struct resource sa11x0udc_resources[] = { [0] = { .start = 0x80000000, @@ -234,9 +245,9 @@ static struct platform_device sa11x0mcp_device = { .resource = sa11x0mcp_resources, }; -void sa11x0_set_mcp_data(struct mcp_plat_data *data) +void sa11x0_register_mcp(struct mcp_plat_data *data) { - sa11x0mcp_device.dev.platform_data = data; + sa11x0_register_device(&sa11x0mcp_device, data); } static struct resource sa11x0ssp_resources[] = { @@ -293,13 +304,13 @@ static struct platform_device sa11x0mtd_device = { .id = -1, }; -void sa11x0_set_flash_data(struct flash_platform_data *flash, - struct resource *res, int nr) +void sa11x0_register_mtd(struct flash_platform_data *flash, + struct resource *res, int nr) { flash->name = "sa1100"; - sa11x0mtd_device.dev.platform_data = flash; sa11x0mtd_device.resource = res; sa11x0mtd_device.num_resources = nr; + sa11x0_register_device(&sa11x0mtd_device, flash); } static struct resource sa11x0ir_resources[] = { @@ -329,9 +340,9 @@ static struct platform_device sa11x0ir_device = { .resource = sa11x0ir_resources, }; -void sa11x0_set_irda_data(struct irda_platform_data *irda) +void sa11x0_register_irda(struct irda_platform_data *irda) { - sa11x0ir_device.dev.platform_data = irda; + sa11x0_register_device(&sa11x0ir_device, irda); } static struct platform_device sa11x0rtc_device = { @@ -343,21 +354,15 @@ static struct platform_device *sa11x0_devices[] __initdata = { &sa11x0udc_device, &sa11x0uart1_device, &sa11x0uart3_device, - &sa11x0mcp_device, &sa11x0ssp_device, &sa11x0pcmcia_device, &sa11x0fb_device, - &sa11x0mtd_device, &sa11x0rtc_device, }; static int __init sa1100_init(void) { pm_power_off = sa1100_power_off; - - if (sa11x0ir_device.dev.platform_data) - platform_device_register(&sa11x0ir_device); - return platform_add_devices(sa11x0_devices, ARRAY_SIZE(sa11x0_devices)); } diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h index 793c2e6c991..ec03f187c52 100644 --- a/arch/arm/mach-sa1100/generic.h +++ b/arch/arm/mach-sa1100/generic.h @@ -32,14 +32,11 @@ extern unsigned int sa11x0_ppcr_to_freq(unsigned int idx); struct flash_platform_data; struct resource; -extern void sa11x0_set_flash_data(struct flash_platform_data *flash, - struct resource *res, int nr); - -struct sa11x0_ssp_plat_ops; -extern void sa11x0_set_ssp_data(struct sa11x0_ssp_plat_ops *ops); +void sa11x0_register_mtd(struct flash_platform_data *flash, + struct resource *res, int nr); struct irda_platform_data; -void sa11x0_set_irda_data(struct irda_platform_data *irda); +void sa11x0_register_irda(struct irda_platform_data *irda); struct mcp_plat_data; -void sa11x0_set_mcp_data(struct mcp_plat_data *data); +void sa11x0_register_mcp(struct mcp_plat_data *data); diff --git a/arch/arm/mach-sa1100/h3100.c b/arch/arm/mach-sa1100/h3100.c new file mode 100644 index 00000000000..0c7cea0dc01 --- /dev/null +++ b/arch/arm/mach-sa1100/h3100.c @@ -0,0 +1,95 @@ +/* + * Support for Compaq iPAQ H3100 handheld computer + * + * Copyright (c) 2000,1 Compaq Computer Corporation. (Author: Jamey Hicks) + * Copyright (c) 2009 Dmitry Artamonow <mad_soft@inbox.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/gpio.h> + +#include <asm/mach-types.h> +#include <asm/mach/arch.h> +#include <asm/mach/irda.h> + +#include <mach/h3xxx.h> + +#include "generic.h" + +/* + * helper for sa1100fb + */ +static void h3100_lcd_power(int enable) +{ + if (!gpio_request(H3XXX_EGPIO_LCD_ON, "LCD ON")) { + gpio_set_value(H3100_GPIO_LCD_3V_ON, enable); + gpio_direction_output(H3XXX_EGPIO_LCD_ON, enable); + gpio_free(H3XXX_EGPIO_LCD_ON); + } else { + pr_err("%s: can't request H3XXX_EGPIO_LCD_ON\n", __func__); + } +} + + +static void __init h3100_map_io(void) +{ + h3xxx_map_io(); + + sa1100fb_lcd_power = h3100_lcd_power; + + /* Older bootldrs put GPIO2-9 in alternate mode on the + assumption that they are used for video */ + GAFR &= ~0x000001fb; +} + +/* + * This turns the IRDA power on or off on the Compaq H3100 + */ +static int h3100_irda_set_power(struct device *dev, unsigned int state) +{ + gpio_set_value(H3100_GPIO_IR_ON, state); + return 0; +} + +static void h3100_irda_set_speed(struct device *dev, unsigned int speed) +{ + gpio_set_value(H3100_GPIO_IR_FSEL, !(speed < 4000000)); +} + +static struct irda_platform_data h3100_irda_data = { + .set_power = h3100_irda_set_power, + .set_speed = h3100_irda_set_speed, +}; + +static struct gpio_default_state h3100_default_gpio[] = { + { H3100_GPIO_IR_ON, GPIO_MODE_OUT0, "IrDA power" }, + { H3100_GPIO_IR_FSEL, GPIO_MODE_OUT0, "IrDA fsel" }, + { H3XXX_GPIO_COM_DCD, GPIO_MODE_IN, "COM DCD" }, + { H3XXX_GPIO_COM_CTS, GPIO_MODE_IN, "COM CTS" }, + { H3XXX_GPIO_COM_RTS, GPIO_MODE_OUT0, "COM RTS" }, + { H3100_GPIO_LCD_3V_ON, GPIO_MODE_OUT0, "LCD 3v" }, +}; + +static void __init h3100_mach_init(void) +{ + h3xxx_init_gpio(h3100_default_gpio, ARRAY_SIZE(h3100_default_gpio)); + h3xxx_mach_init(); + sa11x0_register_irda(&h3100_irda_data); +} + +MACHINE_START(H3100, "Compaq iPAQ H3100") + .phys_io = 0x80000000, + .io_pg_offst = ((0xf8000000) >> 18) & 0xfffc, + .boot_params = 0xc0000100, + .map_io = h3100_map_io, + .init_irq = sa1100_init_irq, + .timer = &sa1100_timer, + .init_machine = h3100_mach_init, +MACHINE_END + diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 0eb2f159578..af3b71459f8 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -1,421 +1,127 @@ /* - * Hardware definitions for Compaq iPAQ H3xxx Handheld Computers + * Support for Compaq iPAQ H3600 handheld computer * - * Copyright 2000,1 Compaq Computer Corporation. + * Copyright (c) 2000,1 Compaq Computer Corporation. (Author: Jamey Hicks) + * Copyright (c) 2009 Dmitry Artamonow <mad_soft@inbox.ru> * - * Use consistent with the GNU GPL is permitted, - * provided that this copyright notice is - * preserved in its entirety in all copies and derived works. - * - * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, - * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS - * FITNESS FOR ANY PARTICULAR PURPOSE. - * - * Author: Jamey Hicks. - * - * History: - * - * 2001-10-?? Andrew Christian Added support for iPAQ H3800 - * and abstracted EGPIO interface. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. * */ -#include <linux/module.h> + #include <linux/init.h> #include <linux/kernel.h> -#include <linux/tty.h> -#include <linux/pm.h> -#include <linux/device.h> -#include <linux/mtd/mtd.h> -#include <linux/mtd/partitions.h> -#include <linux/serial_core.h> +#include <linux/gpio.h> -#include <asm/irq.h> -#include <mach/hardware.h> #include <asm/mach-types.h> -#include <asm/setup.h> - -#include <asm/mach/irq.h> #include <asm/mach/arch.h> -#include <asm/mach/flash.h> #include <asm/mach/irda.h> -#include <asm/mach/map.h> -#include <asm/mach/serial_sa1100.h> -#include <mach/h3600.h> -#include <mach/h3600_gpio.h> +#include <mach/h3xxx.h> #include "generic.h" -void (*assign_h3600_egpio)(enum ipaq_egpio_type x, int level); -EXPORT_SYMBOL(assign_h3600_egpio); - -static struct mtd_partition h3xxx_partitions[] = { - { - .name = "H3XXX boot firmware", - .size = 0x00040000, - .offset = 0, - .mask_flags = MTD_WRITEABLE, /* force read-only */ - }, { - .name = "H3XXX rootfs", - .size = MTDPART_SIZ_FULL, - .offset = 0x00040000, - } -}; - -static void h3xxx_set_vpp(int vpp) -{ - assign_h3600_egpio(IPAQ_EGPIO_VPP_ON, vpp); -} - -static struct flash_platform_data h3xxx_flash_data = { - .map_name = "cfi_probe", - .set_vpp = h3xxx_set_vpp, - .parts = h3xxx_partitions, - .nr_parts = ARRAY_SIZE(h3xxx_partitions), -}; - -static struct resource h3xxx_flash_resource = { - .start = SA1100_CS0_PHYS, - .end = SA1100_CS0_PHYS + SZ_32M - 1, - .flags = IORESOURCE_MEM, -}; - /* - * This turns the IRDA power on or off on the Compaq H3600 - */ -static int h3600_irda_set_power(struct device *dev, unsigned int state) -{ - assign_h3600_egpio( IPAQ_EGPIO_IR_ON, state ); - - return 0; -} - -static void h3600_irda_set_speed(struct device *dev, unsigned int speed) -{ - assign_h3600_egpio(IPAQ_EGPIO_IR_FSEL, !(speed < 4000000)); -} - -static struct irda_platform_data h3600_irda_data = { - .set_power = h3600_irda_set_power, - .set_speed = h3600_irda_set_speed, -}; - -static void h3xxx_mach_init(void) -{ - sa11x0_set_flash_data(&h3xxx_flash_data, &h3xxx_flash_resource, 1); - sa11x0_set_irda_data(&h3600_irda_data); -} - -/* - * low-level UART features + * helper for sa1100fb */ - -static void h3600_uart_set_mctrl(struct uart_port *port, u_int mctrl) +static void h3600_lcd_power(int enable) { - if (port->mapbase == _Ser3UTCR0) { - if (mctrl & TIOCM_RTS) - GPCR = GPIO_H3600_COM_RTS; - else - GPSR = GPIO_H3600_COM_RTS; + if (gpio_request(H3XXX_EGPIO_LCD_ON, "LCD power")) { + pr_err("%s: can't request H3XXX_EGPIO_LCD_ON\n", __func__); + goto err1; } -} - -static u_int h3600_uart_get_mctrl(struct uart_port *port) -{ - u_int ret = TIOCM_CD | TIOCM_CTS | TIOCM_DSR; - - if (port->mapbase == _Ser3UTCR0) { - int gplr = GPLR; - /* DCD and CTS bits are inverted in GPLR by RS232 transceiver */ - if (gplr & GPIO_H3600_COM_DCD) - ret &= ~TIOCM_CD; - if (gplr & GPIO_H3600_COM_CTS) - ret &= ~TIOCM_CTS; + if (gpio_request(H3600_EGPIO_LCD_PCI, "LCD control")) { + pr_err("%s: can't request H3XXX_EGPIO_LCD_PCI\n", __func__); + goto err2; + } + if (gpio_request(H3600_EGPIO_LCD_5V_ON, "LCD 5v")) { + pr_err("%s: can't request H3XXX_EGPIO_LCD_5V_ON\n", __func__); + goto err3; + } + if (gpio_request(H3600_EGPIO_LVDD_ON, "LCD 9v/-6.5v")) { + pr_err("%s: can't request H3600_EGPIO_LVDD_ON\n", __func__); + goto err4; } - return ret; -} + gpio_direction_output(H3XXX_EGPIO_LCD_ON, enable); + gpio_direction_output(H3600_EGPIO_LCD_PCI, enable); + gpio_direction_output(H3600_EGPIO_LCD_5V_ON, enable); + gpio_direction_output(H3600_EGPIO_LVDD_ON, enable); -static void h3600_uart_pm(struct uart_port *port, u_int state, u_int oldstate) -{ - if (port->mapbase == _Ser2UTCR0) { /* TODO: REMOVE THIS */ - assign_h3600_egpio(IPAQ_EGPIO_IR_ON, !state); - } else if (port->mapbase == _Ser3UTCR0) { - assign_h3600_egpio(IPAQ_EGPIO_RS232_ON, !state); - } + gpio_free(H3600_EGPIO_LVDD_ON); +err4: gpio_free(H3600_EGPIO_LCD_5V_ON); +err3: gpio_free(H3600_EGPIO_LCD_PCI); +err2: gpio_free(H3XXX_EGPIO_LCD_ON); +err1: return; } -/* - * Enable/Disable wake up events for this serial port. - * Obviously, we only support this on the normal COM port. - */ -static int h3600_uart_set_wake(struct uart_port *port, u_int enable) +static void __init h3600_map_io(void) { - int err = -EINVAL; + h3xxx_map_io(); - if (port->mapbase == _Ser3UTCR0) { - if (enable) - PWER |= PWER_GPIO23 | PWER_GPIO25; /* DCD and CTS */ - else - PWER &= ~(PWER_GPIO23 | PWER_GPIO25); /* DCD and CTS */ - err = 0; - } - return err; + sa1100fb_lcd_power = h3600_lcd_power; } -static struct sa1100_port_fns h3600_port_fns __initdata = { - .set_mctrl = h3600_uart_set_mctrl, - .get_mctrl = h3600_uart_get_mctrl, - .pm = h3600_uart_pm, - .set_wake = h3600_uart_set_wake, -}; - /* - * helper for sa1100fb + * This turns the IRDA power on or off on the Compaq H3600 */ -static void h3xxx_lcd_power(int enable) +static int h3600_irda_set_power(struct device *dev, unsigned int state) { - assign_h3600_egpio(IPAQ_EGPIO_LCD_POWER, enable); + gpio_set_value(H3600_EGPIO_IR_ON, state); + return 0; } -static struct map_desc h3600_io_desc[] __initdata = { - { /* static memory bank 2 CS#2 */ - .virtual = H3600_BANK_2_VIRT, - .pfn = __phys_to_pfn(SA1100_CS2_PHYS), - .length = 0x02800000, - .type = MT_DEVICE - }, { /* static memory bank 4 CS#4 */ - .virtual = H3600_BANK_4_VIRT, - .pfn = __phys_to_pfn(SA1100_CS4_PHYS), - .length = 0x00800000, - .type = MT_DEVICE - }, { /* EGPIO 0 CS#5 */ - .virtual = H3600_EGPIO_VIRT, - .pfn = __phys_to_pfn(H3600_EGPIO_PHYS), - .length = 0x01000000, - .type = MT_DEVICE - } -}; - -/* - * Common map_io initialization - */ - -static void __init h3xxx_map_io(void) +static void h3600_irda_set_speed(struct device *dev, unsigned int speed) { - sa1100_map_io(); - iotable_init(h3600_io_desc, ARRAY_SIZE(h3600_io_desc)); - - sa1100_register_uart_fns(&h3600_port_fns); - sa1100_register_uart(0, 3); /* Common serial port */ -// sa1100_register_uart(1, 1); /* Microcontroller on 3100/3600 */ - - /* Ensure those pins are outputs and driving low */ - PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM; - PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); - - /* Configure suspend conditions */ - PGSR = 0; - PWER = PWER_GPIO0 | PWER_RTC; - PCFR = PCFR_OPDE; - PSDR = 0; - - sa1100fb_lcd_power = h3xxx_lcd_power; + gpio_set_value(H3600_EGPIO_IR_FSEL, !(speed < 4000000)); } -/************************* H3100 *************************/ - -#ifdef CONFIG_SA1100_H3100 - -#define H3100_EGPIO (*(volatile unsigned int *)H3600_EGPIO_VIRT) -static unsigned int h3100_egpio = 0; - -static void h3100_control_egpio(enum ipaq_egpio_type x, int setp) +static int h3600_irda_startup(struct device *dev) { - unsigned int egpio = 0; - long gpio = 0; - unsigned long flags; - - switch (x) { - case IPAQ_EGPIO_LCD_POWER: - egpio |= EGPIO_H3600_LCD_ON; - gpio |= GPIO_H3100_LCD_3V_ON; - break; - case IPAQ_EGPIO_LCD_ENABLE: - break; - case IPAQ_EGPIO_CODEC_NRESET: - egpio |= EGPIO_H3600_CODEC_NRESET; - break; - case IPAQ_EGPIO_AUDIO_ON: - gpio |= GPIO_H3100_AUD_PWR_ON - | GPIO_H3100_AUD_ON; - break; - case IPAQ_EGPIO_QMUTE: - gpio |= GPIO_H3100_QMUTE; - break; - case IPAQ_EGPIO_OPT_NVRAM_ON: - egpio |= EGPIO_H3600_OPT_NVRAM_ON; - break; - case IPAQ_EGPIO_OPT_ON: - egpio |= EGPIO_H3600_OPT_ON; - break; - case IPAQ_EGPIO_CARD_RESET: - egpio |= EGPIO_H3600_CARD_RESET; - break; - case IPAQ_EGPIO_OPT_RESET: - egpio |= EGPIO_H3600_OPT_RESET; - break; - case IPAQ_EGPIO_IR_ON: - gpio |= GPIO_H3100_IR_ON; - break; - case IPAQ_EGPIO_IR_FSEL: - gpio |= GPIO_H3100_IR_FSEL; - break; - case IPAQ_EGPIO_RS232_ON: - egpio |= EGPIO_H3600_RS232_ON; - break; - case IPAQ_EGPIO_VPP_ON: - egpio |= EGPIO_H3600_VPP_ON; - break; - } + int err = gpio_request(H3600_EGPIO_IR_ON, "IrDA power"); + if (err) + goto err1; + err = gpio_direction_output(H3600_EGPIO_IR_ON, 0); + if (err) + goto err2; + err = gpio_request(H3600_EGPIO_IR_FSEL, "IrDA fsel"); + if (err) + goto err2; + err = gpio_direction_output(H3600_EGPIO_IR_FSEL, 0); + if (err) + goto err3; + return 0; - if (egpio || gpio) { - local_irq_save(flags); - if (setp) { - h3100_egpio |= egpio; - GPSR = gpio; - } else { - h3100_egpio &= ~egpio; - GPCR = gpio; - } - H3100_EGPIO = h3100_egpio; - local_irq_restore(flags); - } +err3: gpio_free(H3600_EGPIO_IR_FSEL); +err2: gpio_free(H3600_EGPIO_IR_ON); +err1: return err; } -#define H3100_DIRECT_EGPIO (GPIO_H3100_BT_ON \ - | GPIO_H3100_GPIO3 \ - | GPIO_H3100_QMUTE \ - | GPIO_H3100_LCD_3V_ON \ - | GPIO_H3100_AUD_ON \ - | GPIO_H3100_AUD_PWR_ON \ - | GPIO_H3100_IR_ON \ - | GPIO_H3100_IR_FSEL) - -static void __init h3100_map_io(void) +static void h3600_irda_shutdown(struct device *dev) { - h3xxx_map_io(); - - /* Initialize h3100-specific values here */ - GPCR = 0x0fffffff; /* All outputs are set low by default */ - GPDR = GPIO_H3600_COM_RTS | GPIO_H3600_L3_CLOCK | - GPIO_H3600_L3_MODE | GPIO_H3600_L3_DATA | - GPIO_H3600_CLK_SET1 | GPIO_H3600_CLK_SET0 | - H3100_DIRECT_EGPIO; - - /* Older bootldrs put GPIO2-9 in alternate mode on the - assumption that they are used for video */ - GAFR &= ~H3100_DIRECT_EGPIO; - - H3100_EGPIO = h3100_egpio; - assign_h3600_egpio = h3100_control_egpio; + gpio_free(H3600_EGPIO_IR_ON); + gpio_free(H3600_EGPIO_IR_FSEL); } -MACHINE_START(H3100, "Compaq iPAQ H3100") - .phys_io = 0x80000000, - .io_pg_offst = ((0xf8000000) >> 18) & 0xfffc, - .boot_params = 0xc0000100, - .map_io = h3100_map_io, - .init_irq = sa1100_init_irq, - .timer = &sa1100_timer, - .init_machine = h3xxx_mach_init, -MACHINE_END - -#endif /* CONFIG_SA1100_H3100 */ - -/************************* H3600 *************************/ - -#ifdef CONFIG_SA1100_H3600 - -#define H3600_EGPIO (*(volatile unsigned int *)H3600_EGPIO_VIRT) -static unsigned int h3600_egpio = EGPIO_H3600_RS232_ON; - -static void h3600_control_egpio(enum ipaq_egpio_type x, int setp) -{ - unsigned int egpio = 0; - unsigned long flags; - - switch (x) { - case IPAQ_EGPIO_LCD_POWER: - egpio |= EGPIO_H3600_LCD_ON | - EGPIO_H3600_LCD_PCI | - EGPIO_H3600_LCD_5V_ON | - EGPIO_H3600_LVDD_ON; - break; - case IPAQ_EGPIO_LCD_ENABLE: - break; - case IPAQ_EGPIO_CODEC_NRESET: - egpio |= EGPIO_H3600_CODEC_NRESET; - break; - case IPAQ_EGPIO_AUDIO_ON: - egpio |= EGPIO_H3600_AUD_AMP_ON | - EGPIO_H3600_AUD_PWR_ON; - break; - case IPAQ_EGPIO_QMUTE: - egpio |= EGPIO_H3600_QMUTE; - break; - case IPAQ_EGPIO_OPT_NVRAM_ON: - egpio |= EGPIO_H3600_OPT_NVRAM_ON; - break; - case IPAQ_EGPIO_OPT_ON: - egpio |= EGPIO_H3600_OPT_ON; - break; - case IPAQ_EGPIO_CARD_RESET: - egpio |= EGPIO_H3600_CARD_RESET; - break; - case IPAQ_EGPIO_OPT_RESET: - egpio |= EGPIO_H3600_OPT_RESET; - break; - case IPAQ_EGPIO_IR_ON: - egpio |= EGPIO_H3600_IR_ON; - break; - case IPAQ_EGPIO_IR_FSEL: - egpio |= EGPIO_H3600_IR_FSEL; - break; - case IPAQ_EGPIO_RS232_ON: - egpio |= EGPIO_H3600_RS232_ON; - break; - case IPAQ_EGPIO_VPP_ON: - egpio |= EGPIO_H3600_VPP_ON; - break; - } +static struct irda_platform_data h3600_irda_data = { + .set_power = h3600_irda_set_power, + .set_speed = h3600_irda_set_speed, + .startup = h3600_irda_startup, + .shutdown = h3600_irda_shutdown, +}; - if (egpio) { - local_irq_save(flags); - if (setp) - h3600_egpio |= egpio; - else - h3600_egpio &= ~egpio; - H3600_EGPIO = h3600_egpio; - local_irq_restore(flags); - } -} +static struct gpio_default_state h3600_default_gpio[] = { + { H3XXX_GPIO_COM_DCD, GPIO_MODE_IN, "COM DCD" }, + { H3XXX_GPIO_COM_CTS, GPIO_MODE_IN, "COM CTS" }, + { H3XXX_GPIO_COM_RTS, GPIO_MODE_OUT0, "COM RTS" }, +}; -static void __init h3600_map_io(void) +static void __init h3600_mach_init(void) { - h3xxx_map_io(); - - /* Initialize h3600-specific values here */ - - GPCR = 0x0fffffff; /* All outputs are set low by default */ - GPDR = GPIO_H3600_COM_RTS | GPIO_H3600_L3_CLOCK | - GPIO_H3600_L3_MODE | GPIO_H3600_L3_DATA | - GPIO_H3600_CLK_SET1 | GPIO_H3600_CLK_SET0 | - GPIO_LDD15 | GPIO_LDD14 | GPIO_LDD13 | GPIO_LDD12 | - GPIO_LDD11 | GPIO_LDD10 | GPIO_LDD9 | GPIO_LDD8; - - H3600_EGPIO = h3600_egpio; /* Maintains across sleep? */ - assign_h3600_egpio = h3600_control_egpio; + h3xxx_init_gpio(h3600_default_gpio, ARRAY_SIZE(h3600_default_gpio)); + h3xxx_mach_init(); + sa11x0_register_irda(&h3600_irda_data); } MACHINE_START(H3600, "Compaq iPAQ H3600") @@ -425,8 +131,6 @@ MACHINE_START(H3600, "Compaq iPAQ H3600") .map_io = h3600_map_io, .init_irq = sa1100_init_irq, .timer = &sa1100_timer, - .init_machine = h3xxx_mach_init, + .init_machine = h3600_mach_init, MACHINE_END -#endif /* CONFIG_SA1100_H3600 */ - diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c new file mode 100644 index 00000000000..b0784c974c2 --- /dev/null +++ b/arch/arm/mach-sa1100/h3xxx.c @@ -0,0 +1,313 @@ +/* + * Support for Compaq iPAQ H3100 and H3600 handheld computers (common code) + * + * Copyright (c) 2000,1 Compaq Computer Corporation. (Author: Jamey Hicks) + * Copyright (c) 2009 Dmitry Artamonow <mad_soft@inbox.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/gpio.h> +#include <linux/gpio_keys.h> +#include <linux/input.h> +#include <linux/mfd/htc-egpio.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/partitions.h> +#include <linux/platform_device.h> +#include <linux/serial_core.h> + +#include <asm/mach/flash.h> +#include <asm/mach/map.h> +#include <asm/mach/serial_sa1100.h> + +#include <mach/h3xxx.h> + +#include "generic.h" + +void h3xxx_init_gpio(struct gpio_default_state *s, size_t n) +{ + while (n--) { + const char *name = s->name; + int err; + + if (!name) + name = "[init]"; + err = gpio_request(s->gpio, name); + if (err) { + printk(KERN_ERR "gpio%u: unable to request: %d\n", + s->gpio, err); + continue; + } + if (s->mode >= 0) { + err = gpio_direction_output(s->gpio, s->mode); + } else { + err = gpio_direction_input(s->gpio); + } + if (err) { + printk(KERN_ERR "gpio%u: unable to set direction: %d\n", + s->gpio, err); + continue; + } + if (!s->name) + gpio_free(s->gpio); + s++; + } +} + + +/* + * H3xxx flash support + */ +static struct mtd_partition h3xxx_partitions[] = { + { + .name = "H3XXX boot firmware", + .size = 0x00040000, + .offset = 0, + .mask_flags = MTD_WRITEABLE, /* force read-only */ + }, { + .name = "H3XXX rootfs", + .size = MTDPART_SIZ_FULL, + .offset = 0x00040000, + } +}; + +static void h3xxx_set_vpp(int vpp) +{ + gpio_set_value(H3XXX_EGPIO_VPP_ON, vpp); +} + +static int h3xxx_flash_init(void) +{ + int err = gpio_request(H3XXX_EGPIO_VPP_ON, "Flash Vpp"); + if (err) { + pr_err("%s: can't request H3XXX_EGPIO_VPP_ON\n", __func__); + return err; + } + + err = gpio_direction_output(H3XXX_EGPIO_VPP_ON, 0); + if (err) + gpio_free(H3XXX_EGPIO_VPP_ON); + + return err; +} + +static void h3xxx_flash_exit(void) +{ + gpio_free(H3XXX_EGPIO_VPP_ON); +} + +static struct flash_platform_data h3xxx_flash_data = { + .map_name = "cfi_probe", + .set_vpp = h3xxx_set_vpp, + .init = h3xxx_flash_init, + .exit = h3xxx_flash_exit, + .parts = h3xxx_partitions, + .nr_parts = ARRAY_SIZE(h3xxx_partitions), +}; + +static struct resource h3xxx_flash_resource = { + .start = SA1100_CS0_PHYS, + .end = SA1100_CS0_PHYS + SZ_32M - 1, + .flags = IORESOURCE_MEM, +}; + + +/* + * H3xxx uart support + */ +static void h3xxx_uart_set_mctrl(struct uart_port *port, u_int mctrl) +{ + if (port->mapbase == _Ser3UTCR0) { + gpio_set_value(H3XXX_GPIO_COM_RTS, !(mctrl & TIOCM_RTS)); + } +} + +static u_int h3xxx_uart_get_mctrl(struct uart_port *port) +{ + u_int ret = TIOCM_CD | TIOCM_CTS | TIOCM_DSR; + + if (port->mapbase == _Ser3UTCR0) { + /* + * DCD and CTS bits are inverted in GPLR by RS232 transceiver + */ + if (gpio_get_value(H3XXX_GPIO_COM_DCD)) + ret &= ~TIOCM_CD; + if (gpio_get_value(H3XXX_GPIO_COM_CTS)) + ret &= ~TIOCM_CTS; + } + + return ret; +} + +static void h3xxx_uart_pm(struct uart_port *port, u_int state, u_int oldstate) +{ + if (port->mapbase == _Ser3UTCR0) { + if (!gpio_request(H3XXX_EGPIO_RS232_ON, "RS232 transceiver")) { + gpio_direction_output(H3XXX_EGPIO_RS232_ON, !state); + gpio_free(H3XXX_EGPIO_RS232_ON); + } else { + pr_err("%s: can't request H3XXX_EGPIO_RS232_ON\n", + __func__); + } + } +} + +/* + * Enable/Disable wake up events for this serial port. + * Obviously, we only support this on the normal COM port. + */ +static int h3xxx_uart_set_wake(struct uart_port *port, u_int enable) +{ + int err = -EINVAL; + + if (port->mapbase == _Ser3UTCR0) { + if (enable) + PWER |= PWER_GPIO23 | PWER_GPIO25; /* DCD and CTS */ + else + PWER &= ~(PWER_GPIO23 | PWER_GPIO25); /* DCD and CTS */ + err = 0; + } + return err; +} + +static struct sa1100_port_fns h3xxx_port_fns __initdata = { + .set_mctrl = h3xxx_uart_set_mctrl, + .get_mctrl = h3xxx_uart_get_mctrl, + .pm = h3xxx_uart_pm, + .set_wake = h3xxx_uart_set_wake, +}; + +/* + * EGPIO + */ + +static struct resource egpio_resources[] = { + [0] = { + .start = H3600_EGPIO_PHYS, + .end = H3600_EGPIO_PHYS + 0x4 - 1, + .flags = IORESOURCE_MEM, + }, +}; + +static struct htc_egpio_chip egpio_chips[] = { + [0] = { + .reg_start = 0, + .gpio_base = H3XXX_EGPIO_BASE, + .num_gpios = 16, + .direction = HTC_EGPIO_OUTPUT, + .initial_values = 0x0080, /* H3XXX_EGPIO_RS232_ON */ + }, +}; + +static struct htc_egpio_platform_data egpio_info = { + .reg_width = 16, + .bus_width = 16, + .chip = egpio_chips, + .num_chips = ARRAY_SIZE(egpio_chips), +}; + +static struct platform_device h3xxx_egpio = { + .name = "htc-egpio", + .id = -1, + .resource = egpio_resources, + .num_resources = ARRAY_SIZE(egpio_resources), + .dev = { + .platform_data = &egpio_info, + }, +}; + +/* + * GPIO keys + */ + +static struct gpio_keys_button h3xxx_button_table[] = { + { + .code = KEY_POWER, + .gpio = H3XXX_GPIO_PWR_BUTTON, + .desc = "Power Button", + .active_low = 1, + .type = EV_KEY, + .wakeup = 1, + }, { + .code = KEY_ENTER, + .gpio = H3XXX_GPIO_ACTION_BUTTON, + .active_low = 1, + .desc = "Action button", + .type = EV_KEY, + .wakeup = 0, + }, +}; + +static struct gpio_keys_platform_data h3xxx_keys_data = { + .buttons = h3xxx_button_table, + .nbuttons = ARRAY_SIZE(h3xxx_button_table), +}; + +static struct platform_device h3xxx_keys = { + .name = "gpio-keys", + .id = -1, + .dev = { + .platform_data = &h3xxx_keys_data, + }, +}; + +static struct platform_device *h3xxx_devices[] = { + &h3xxx_egpio, + &h3xxx_keys, +}; + +void __init h3xxx_mach_init(void) +{ + sa1100_register_uart_fns(&h3xxx_port_fns); + sa11x0_register_mtd(&h3xxx_flash_data, &h3xxx_flash_resource, 1); + platform_add_devices(h3xxx_devices, ARRAY_SIZE(h3xxx_devices)); +} + +static struct map_desc h3600_io_desc[] __initdata = { + { /* static memory bank 2 CS#2 */ + .virtual = H3600_BANK_2_VIRT, + .pfn = __phys_to_pfn(SA1100_CS2_PHYS), + .length = 0x02800000, + .type = MT_DEVICE + }, { /* static memory bank 4 CS#4 */ + .virtual = H3600_BANK_4_VIRT, + .pfn = __phys_to_pfn(SA1100_CS4_PHYS), + .length = 0x00800000, + .type = MT_DEVICE + }, { /* EGPIO 0 CS#5 */ + .virtual = H3600_EGPIO_VIRT, + .pfn = __phys_to_pfn(H3600_EGPIO_PHYS), + .length = 0x01000000, + .type = MT_DEVICE + } +}; + +/* + * Common map_io initialization + */ + +void __init h3xxx_map_io(void) +{ + sa1100_map_io(); + iotable_init(h3600_io_desc, ARRAY_SIZE(h3600_io_desc)); + + sa1100_register_uart(0, 3); /* Common serial port */ +// sa1100_register_uart(1, 1); /* Microcontroller on 3100/3600 */ + + /* Ensure those pins are outputs and driving low */ + PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM; + PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + + /* Configure suspend conditions */ + PGSR = 0; + PCFR = PCFR_OPDE; + PSDR = 0; + + GPCR = 0x0fffffff; /* All outputs are set low by default */ + GPDR = 0; /* Configure all GPIOs as input */ +} + diff --git a/arch/arm/mach-sa1100/hackkit.c b/arch/arm/mach-sa1100/hackkit.c index e7056c0b562..51568dfc8e9 100644 --- a/arch/arm/mach-sa1100/hackkit.c +++ b/arch/arm/mach-sa1100/hackkit.c @@ -187,7 +187,7 @@ static struct resource hackkit_flash_resource = { static void __init hackkit_init(void) { - sa11x0_set_flash_data(&hackkit_flash_data, &hackkit_flash_resource, 1); + sa11x0_register_mtd(&hackkit_flash_data, &hackkit_flash_resource, 1); } /********************************************************************** diff --git a/arch/arm/mach-sa1100/include/mach/collie.h b/arch/arm/mach-sa1100/include/mach/collie.h index 9efb569cdb6..71a0b3fdcc8 100644 --- a/arch/arm/mach-sa1100/include/mach/collie.h +++ b/arch/arm/mach-sa1100/include/mach/collie.h @@ -25,29 +25,39 @@ #define COLLIE_GPIO_VPEN (COLLIE_SCOOP_GPIO_BASE + 7) #define COLLIE_SCP_LB_VOL_CHG SCOOP_GPCR_PA19 -#define COLLIE_SCOOP_IO_DIR ( COLLIE_SCP_MUTE_L | COLLIE_SCP_MUTE_R | \ +#define COLLIE_SCOOP_IO_DIR (COLLIE_SCP_MUTE_L | COLLIE_SCP_MUTE_R | \ COLLIE_SCP_5VON | COLLIE_SCP_AMP_ON | \ - COLLIE_SCP_LB_VOL_CHG ) -#define COLLIE_SCOOP_IO_OUT ( COLLIE_SCP_MUTE_L | COLLIE_SCP_MUTE_R ) + COLLIE_SCP_LB_VOL_CHG) +#define COLLIE_SCOOP_IO_OUT (COLLIE_SCP_MUTE_L | COLLIE_SCP_MUTE_R) -/* GPIOs for which the generic definition doesn't say much */ +/* GPIOs for gpiolib */ -#define COLLIE_GPIO_ON_KEY GPIO_GPIO (0) -#define COLLIE_GPIO_AC_IN GPIO_GPIO (1) -#define COLLIE_GPIO_SDIO_INT GPIO_GPIO (11) -#define COLLIE_GPIO_CF_IRQ GPIO_GPIO (14) -#define COLLIE_GPIO_nREMOCON_INT GPIO_GPIO (15) -#define COLLIE_GPIO_UCB1x00_RESET GPIO_GPIO (16) -#define COLLIE_GPIO_nMIC_ON GPIO_GPIO (17) -#define COLLIE_GPIO_nREMOCON_ON GPIO_GPIO (18) -#define COLLIE_GPIO_CO GPIO_GPIO (20) -#define COLLIE_GPIO_MCP_CLK GPIO_GPIO (21) -#define COLLIE_GPIO_CF_CD GPIO_GPIO (22) -#define COLLIE_GPIO_UCB1x00_IRQ GPIO_GPIO (23) -#define COLLIE_GPIO_WAKEUP GPIO_GPIO (24) -#define COLLIE_GPIO_GA_INT GPIO_GPIO (25) -#define COLLIE_GPIO_MAIN_BAT_LOW GPIO_GPIO (26) +#define COLLIE_GPIO_ON_KEY (0) +#define COLLIE_GPIO_AC_IN (1) +#define COLLIE_GPIO_SDIO_INT (11) +#define COLLIE_GPIO_CF_IRQ (14) +#define COLLIE_GPIO_nREMOCON_INT (15) +#define COLLIE_GPIO_UCB1x00_RESET (16) +#define COLLIE_GPIO_nMIC_ON (17) +#define COLLIE_GPIO_nREMOCON_ON (18) +#define COLLIE_GPIO_CO (20) +#define COLLIE_GPIO_MCP_CLK (21) +#define COLLIE_GPIO_CF_CD (22) +#define COLLIE_GPIO_UCB1x00_IRQ (23) +#define COLLIE_GPIO_WAKEUP (24) +#define COLLIE_GPIO_GA_INT (25) +#define COLLIE_GPIO_MAIN_BAT_LOW (26) +/* GPIO definitions for direct register access */ + +#define _COLLIE_GPIO_ON_KEY GPIO_GPIO(0) +#define _COLLIE_GPIO_AC_IN GPIO_GPIO(1) +#define _COLLIE_GPIO_nREMOCON_INT GPIO_GPIO(15) +#define _COLLIE_GPIO_UCB1x00_RESET GPIO_GPIO(16) +#define _COLLIE_GPIO_nMIC_ON GPIO_GPIO(17) +#define _COLLIE_GPIO_nREMOCON_ON GPIO_GPIO(18) +#define _COLLIE_GPIO_CO GPIO_GPIO(20) +#define _COLLIE_GPIO_WAKEUP GPIO_GPIO(24) /* Interrupts */ #define COLLIE_IRQ_GPIO_ON_KEY IRQ_GPIO0 @@ -70,19 +80,20 @@ #define COLLIE_LCM_IRQ_GPIO_nSD_WP IRQ_LOCOMO_GPIO14 /* GPIO's on the TC35143AF (Toshiba Analog Frontend) */ -#define COLLIE_TC35143_GPIO_VERSION0 UCB_IO_0 /* GPIO0=Version */ -#define COLLIE_TC35143_GPIO_TBL_CHK UCB_IO_1 /* GPIO1=TBL_CHK */ -#define COLLIE_TC35143_GPIO_VPEN_ON UCB_IO_2 /* GPIO2=VPNE_ON */ -#define COLLIE_TC35143_GPIO_IR_ON UCB_IO_3 /* GPIO3=IR_ON */ -#define COLLIE_TC35143_GPIO_AMP_ON UCB_IO_4 /* GPIO4=AMP_ON */ -#define COLLIE_TC35143_GPIO_VERSION1 UCB_IO_5 /* GPIO5=Version */ -#define COLLIE_TC35143_GPIO_FS8KLPF UCB_IO_5 /* GPIO5=fs 8k LPF */ -#define COLLIE_TC35143_GPIO_BUZZER_BIAS UCB_IO_6 /* GPIO6=BUZZER BIAS */ -#define COLLIE_TC35143_GPIO_MBAT_ON UCB_IO_7 /* GPIO7=MBAT_ON */ -#define COLLIE_TC35143_GPIO_BBAT_ON UCB_IO_8 /* GPIO8=BBAT_ON */ -#define COLLIE_TC35143_GPIO_TMP_ON UCB_IO_9 /* GPIO9=TMP_ON */ -#define COLLIE_TC35143_GPIO_IN ( UCB_IO_0 | UCB_IO_2 | UCB_IO_5 ) -#define COLLIE_TC35143_GPIO_OUT ( UCB_IO_1 | UCB_IO_3 | UCB_IO_4 | UCB_IO_6 | \ - UCB_IO_7 | UCB_IO_8 | UCB_IO_9 ) +#define COLLIE_TC35143_GPIO_BASE (GPIO_MAX + 13) +#define COLLIE_TC35143_GPIO_VERSION0 UCB_IO_0 +#define COLLIE_TC35143_GPIO_TBL_CHK UCB_IO_1 +#define COLLIE_TC35143_GPIO_VPEN_ON UCB_IO_2 +#define COLLIE_TC35143_GPIO_IR_ON UCB_IO_3 +#define COLLIE_TC35143_GPIO_AMP_ON UCB_IO_4 +#define COLLIE_TC35143_GPIO_VERSION1 UCB_IO_5 +#define COLLIE_TC35143_GPIO_FS8KLPF UCB_IO_5 +#define COLLIE_TC35143_GPIO_BUZZER_BIAS UCB_IO_6 +#define COLLIE_GPIO_MBAT_ON (COLLIE_TC35143_GPIO_BASE + 7) +#define COLLIE_GPIO_BBAT_ON (COLLIE_TC35143_GPIO_BASE + 8) +#define COLLIE_GPIO_TMP_ON (COLLIE_TC35143_GPIO_BASE + 9) +#define COLLIE_TC35143_GPIO_IN (UCB_IO_0 | UCB_IO_2 | UCB_IO_5) +#define COLLIE_TC35143_GPIO_OUT (UCB_IO_1 | UCB_IO_3 | UCB_IO_4 \ + | UCB_IO_6) #endif diff --git a/arch/arm/mach-sa1100/include/mach/gpio.h b/arch/arm/mach-sa1100/include/mach/gpio.h index 582a0c92da5..7befc104e9a 100644 --- a/arch/arm/mach-sa1100/include/mach/gpio.h +++ b/arch/arm/mach-sa1100/include/mach/gpio.h @@ -49,20 +49,9 @@ static inline void gpio_set_value(unsigned gpio, int value) #define gpio_cansleep __gpio_cansleep -static inline unsigned gpio_to_irq(unsigned gpio) -{ - if (gpio < 11) - return IRQ_GPIO0 + gpio; - else - return IRQ_GPIO11 - 11 + gpio; -} - -static inline unsigned irq_to_gpio(unsigned irq) -{ - if (irq < IRQ_GPIO11_27) - return irq - IRQ_GPIO0; - else - return irq - IRQ_GPIO11 + 11; -} +#define gpio_to_irq(gpio) ((gpio < 11) ? (IRQ_GPIO0 + gpio) : \ + (IRQ_GPIO11 - 11 + gpio)) +#define irq_to_gpio(irq) ((irq < IRQ_GPIO11_27) ? (irq - IRQ_GPIO0) : \ + (irq - IRQ_GPIO11 + 11)) #endif diff --git a/arch/arm/mach-sa1100/include/mach/h3600.h b/arch/arm/mach-sa1100/include/mach/h3600.h deleted file mode 100644 index 2827faa4742..00000000000 --- a/arch/arm/mach-sa1100/include/mach/h3600.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * - * Definitions for H3600 Handheld Computer - * - * Copyright 2000 Compaq Computer Corporation. - * - * Use consistent with the GNU GPL is permitted, - * provided that this copyright notice is - * preserved in its entirety in all copies and derived works. - * - * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, - * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS - * FITNESS FOR ANY PARTICULAR PURPOSE. - * - * Author: Jamey Hicks. - * - * History: - * - * 2001-10-?? Andrew Christian Added support for iPAQ H3800 - * - */ - -#ifndef _INCLUDE_H3600_H_ -#define _INCLUDE_H3600_H_ - -typedef int __bitwise pm_request_t; - -#define PM_SUSPEND ((__force pm_request_t) 1) /* enter D1-D3 */ -#define PM_RESUME ((__force pm_request_t) 2) /* enter D0 */ - -/* generalized support for H3xxx series Compaq Pocket PC's */ -#define machine_is_h3xxx() (machine_is_h3100() || machine_is_h3600()) - -/* Physical memory regions corresponding to chip selects */ -#define H3600_EGPIO_PHYS (SA1100_CS5_PHYS + 0x01000000) -#define H3600_BANK_2_PHYS SA1100_CS2_PHYS -#define H3600_BANK_4_PHYS SA1100_CS4_PHYS - -/* Virtual memory regions corresponding to chip selects 2 & 4 (used on sleeves) */ -#define H3600_EGPIO_VIRT 0xf0000000 -#define H3600_BANK_2_VIRT 0xf1000000 -#define H3600_BANK_4_VIRT 0xf3800000 - -/* - Machine-independent GPIO definitions - --- these are common across all current iPAQ platforms -*/ - -#define GPIO_H3600_NPOWER_BUTTON GPIO_GPIO (0) /* Also known as the "off button" */ - -#define GPIO_H3600_PCMCIA_CD1 GPIO_GPIO (10) -#define GPIO_H3600_PCMCIA_IRQ1 GPIO_GPIO (11) - -/* UDA1341 L3 Interface */ -#define GPIO_H3600_L3_DATA GPIO_GPIO (14) -#define GPIO_H3600_L3_MODE GPIO_GPIO (15) -#define GPIO_H3600_L3_CLOCK GPIO_GPIO (16) - -#define GPIO_H3600_PCMCIA_CD0 GPIO_GPIO (17) -#define GPIO_H3600_SYS_CLK GPIO_GPIO (19) -#define GPIO_H3600_PCMCIA_IRQ0 GPIO_GPIO (21) - -#define GPIO_H3600_COM_DCD GPIO_GPIO (23) -#define GPIO_H3600_OPT_IRQ GPIO_GPIO (24) -#define GPIO_H3600_COM_CTS GPIO_GPIO (25) -#define GPIO_H3600_COM_RTS GPIO_GPIO (26) - -#define IRQ_GPIO_H3600_NPOWER_BUTTON IRQ_GPIO0 -#define IRQ_GPIO_H3600_PCMCIA_CD1 IRQ_GPIO10 -#define IRQ_GPIO_H3600_PCMCIA_IRQ1 IRQ_GPIO11 -#define IRQ_GPIO_H3600_PCMCIA_CD0 IRQ_GPIO17 -#define IRQ_GPIO_H3600_PCMCIA_IRQ0 IRQ_GPIO21 -#define IRQ_GPIO_H3600_COM_DCD IRQ_GPIO23 -#define IRQ_GPIO_H3600_OPT_IRQ IRQ_GPIO24 -#define IRQ_GPIO_H3600_COM_CTS IRQ_GPIO25 - - -#ifndef __ASSEMBLY__ - -enum ipaq_egpio_type { - IPAQ_EGPIO_LCD_POWER, /* Power to the LCD panel */ - IPAQ_EGPIO_CODEC_NRESET, /* Clear to reset the audio codec (remember to return high) */ - IPAQ_EGPIO_AUDIO_ON, /* Audio power */ - IPAQ_EGPIO_QMUTE, /* Audio muting */ - IPAQ_EGPIO_OPT_NVRAM_ON, /* Non-volatile RAM on extension sleeves (SPI interface) */ - IPAQ_EGPIO_OPT_ON, /* Power to extension sleeves */ - IPAQ_EGPIO_CARD_RESET, /* Reset PCMCIA cards on extension sleeve (???) */ - IPAQ_EGPIO_OPT_RESET, /* Reset option pack (???) */ - IPAQ_EGPIO_IR_ON, /* IR sensor/emitter power */ - IPAQ_EGPIO_IR_FSEL, /* IR speed selection 1->fast, 0->slow */ - IPAQ_EGPIO_RS232_ON, /* Maxim RS232 chip power */ - IPAQ_EGPIO_VPP_ON, /* Turn on power to flash programming */ - IPAQ_EGPIO_LCD_ENABLE, /* Enable/disable LCD controller */ -}; - -extern void (*assign_h3600_egpio)(enum ipaq_egpio_type x, int level); - -#endif /* ASSEMBLY */ - -#endif /* _INCLUDE_H3600_H_ */ diff --git a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h deleted file mode 100644 index a36ca76d018..00000000000 --- a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * - * Definitions for H3600 Handheld Computer - * - * Copyright 2000 Compaq Computer Corporation. - * - * Use consistent with the GNU GPL is permitted, - * provided that this copyright notice is - * preserved in its entirety in all copies and derived works. - * - * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, - * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS - * FITNESS FOR ANY PARTICULAR PURPOSE. - * - * Author: Jamey Hicks. - * - * History: - * - * 2001-10-?? Andrew Christian Added support for iPAQ H3800 - * - */ - -#ifndef _INCLUDE_H3600_GPIO_H_ -#define _INCLUDE_H3600_GPIO_H_ - -/* - * GPIO lines that are common across ALL iPAQ models are in "h3600.h" - * This file contains machine-specific definitions - */ - -#define GPIO_H3600_SUSPEND GPIO_GPIO (0) -/* GPIO[2:9] used by LCD on H3600/3800, used as GPIO on H3100 */ -#define GPIO_H3100_BT_ON GPIO_GPIO (2) -#define GPIO_H3100_GPIO3 GPIO_GPIO (3) -#define GPIO_H3100_QMUTE GPIO_GPIO (4) -#define GPIO_H3100_LCD_3V_ON GPIO_GPIO (5) -#define GPIO_H3100_AUD_ON GPIO_GPIO (6) -#define GPIO_H3100_AUD_PWR_ON GPIO_GPIO (7) -#define GPIO_H3100_IR_ON GPIO_GPIO (8) -#define GPIO_H3100_IR_FSEL GPIO_GPIO (9) - -/* for H3600, audio sample rate clock generator */ -#define GPIO_H3600_CLK_SET0 GPIO_GPIO (12) -#define GPIO_H3600_CLK_SET1 GPIO_GPIO (13) - -#define GPIO_H3600_ACTION_BUTTON GPIO_GPIO (18) -#define GPIO_H3600_SOFT_RESET GPIO_GPIO (20) /* Also known as BATT_FAULT */ -#define GPIO_H3600_OPT_LOCK GPIO_GPIO (22) -#define GPIO_H3600_OPT_DET GPIO_GPIO (27) - -/****************************************************/ - -#define IRQ_GPIO_H3600_ACTION_BUTTON IRQ_GPIO18 -#define IRQ_GPIO_H3600_OPT_DET IRQ_GPIO27 - -/* H3100 / 3600 EGPIO pins */ -#define EGPIO_H3600_VPP_ON (1 << 0) -#define EGPIO_H3600_CARD_RESET (1 << 1) /* reset the attached pcmcia/compactflash card. active high. */ -#define EGPIO_H3600_OPT_RESET (1 << 2) /* reset the attached option pack. active high. */ -#define EGPIO_H3600_CODEC_NRESET (1 << 3) /* reset the onboard UDA1341. active low. */ -#define EGPIO_H3600_OPT_NVRAM_ON (1 << 4) /* apply power to optionpack nvram, active high. */ -#define EGPIO_H3600_OPT_ON (1 << 5) /* full power to option pack. active high. */ -#define EGPIO_H3600_LCD_ON (1 << 6) /* enable 3.3V to LCD. active high. */ -#define EGPIO_H3600_RS232_ON (1 << 7) /* UART3 transceiver force on. Active high. */ - -/* H3600 only EGPIO pins */ -#define EGPIO_H3600_LCD_PCI (1 << 8) /* LCD control IC enable. active high. */ -#define EGPIO_H3600_IR_ON (1 << 9) /* apply power to IR module. active high. */ -#define EGPIO_H3600_AUD_AMP_ON (1 << 10) /* apply power to audio power amp. active high. */ -#define EGPIO_H3600_AUD_PWR_ON (1 << 11) /* apply power to reset of audio circuit. active high. */ -#define EGPIO_H3600_QMUTE (1 << 12) /* mute control for onboard UDA1341. active high. */ -#define EGPIO_H3600_IR_FSEL (1 << 13) /* IR speed select: 1->fast, 0->slow */ -#define EGPIO_H3600_LCD_5V_ON (1 << 14) /* enable 5V to LCD. active high. */ -#define EGPIO_H3600_LVDD_ON (1 << 15) /* enable 9V and -6.5V to LCD. */ - - -#endif /* _INCLUDE_H3600_GPIO_H_ */ diff --git a/arch/arm/mach-sa1100/include/mach/h3xxx.h b/arch/arm/mach-sa1100/include/mach/h3xxx.h new file mode 100644 index 00000000000..7d9df16f04a --- /dev/null +++ b/arch/arm/mach-sa1100/include/mach/h3xxx.h @@ -0,0 +1,94 @@ +/* + * Definitions for Compaq iPAQ H3100 and H3600 handheld computers + * + * (c) 2000 Compaq Computer Corporation. (Author: Jamey Hicks) + * (c) 2009 Dmitry Artamonow <mad_soft@inbox.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef _INCLUDE_H3XXX_H_ +#define _INCLUDE_H3XXX_H_ + +/* Physical memory regions corresponding to chip selects */ +#define H3600_EGPIO_PHYS (SA1100_CS5_PHYS + 0x01000000) +#define H3600_BANK_2_PHYS SA1100_CS2_PHYS +#define H3600_BANK_4_PHYS SA1100_CS4_PHYS + +/* Virtual memory regions corresponding to chip selects 2 & 4 (used on sleeves) */ +#define H3600_EGPIO_VIRT 0xf0000000 +#define H3600_BANK_2_VIRT 0xf1000000 +#define H3600_BANK_4_VIRT 0xf3800000 + +/* + * gpiolib numbers for all iPAQs + */ +#define H3XXX_GPIO_PWR_BUTTON 0 +#define H3XXX_GPIO_PCMCIA_CD1 10 +#define H3XXX_GPIO_PCMCIA_IRQ1 11 +#define H3XXX_GPIO_PCMCIA_CD0 17 +#define H3XXX_GPIO_ACTION_BUTTON 18 +#define H3XXX_GPIO_SYS_CLK 19 +#define H3XXX_GPIO_PCMCIA_IRQ0 21 +#define H3XXX_GPIO_COM_DCD 23 +#define H3XXX_GPIO_OPTION 24 +#define H3XXX_GPIO_COM_CTS 25 +#define H3XXX_GPIO_COM_RTS 26 + +/* machine-specific gpios */ + +#define H3100_GPIO_BT_ON 2 +#define H3100_GPIO_QMUTE 4 +#define H3100_GPIO_LCD_3V_ON 5 +#define H3100_GPIO_AUD_ON 6 +#define H3100_GPIO_AUD_PWR_ON 7 +#define H3100_GPIO_IR_ON 8 +#define H3100_GPIO_IR_FSEL 9 + +#define H3600_GPIO_CLK_SET0 12 /* audio sample rate clock generator */ +#define H3600_GPIO_CLK_SET1 13 +#define H3600_GPIO_SOFT_RESET 20 /* also known as BATT_FAULT */ +#define H3600_GPIO_OPT_LOCK 22 +#define H3600_GPIO_OPT_DET 27 + + +/* H3100 / 3600 EGPIO pins */ +#define H3XXX_EGPIO_BASE (GPIO_MAX + 1) + +#define H3XXX_EGPIO_VPP_ON (H3XXX_EGPIO_BASE + 0) +#define H3XXX_EGPIO_CARD_RESET (H3XXX_EGPIO_BASE + 1) /* reset the attached pcmcia/compactflash card. active high. */ +#define H3XXX_EGPIO_OPT_RESET (H3XXX_EGPIO_BASE + 2) /* reset the attached option pack. active high. */ +#define H3XXX_EGPIO_CODEC_NRESET (H3XXX_EGPIO_BASE + 3) /* reset the onboard UDA1341. active low. */ +#define H3XXX_EGPIO_OPT_NVRAM_ON (H3XXX_EGPIO_BASE + 4) /* apply power to optionpack nvram, active high. */ +#define H3XXX_EGPIO_OPT_ON (H3XXX_EGPIO_BASE + 5) /* full power to option pack. active high. */ +#define H3XXX_EGPIO_LCD_ON (H3XXX_EGPIO_BASE + 6) /* enable 3.3V to LCD. active high. */ +#define H3XXX_EGPIO_RS232_ON (H3XXX_EGPIO_BASE + 7) /* UART3 transceiver force on. Active high. */ + +/* H3600 only EGPIO pins */ +#define H3600_EGPIO_LCD_PCI (H3XXX_EGPIO_BASE + 8) /* LCD control IC enable. active high. */ +#define H3600_EGPIO_IR_ON (H3XXX_EGPIO_BASE + 9) /* apply power to IR module. active high. */ +#define H3600_EGPIO_AUD_AMP_ON (H3XXX_EGPIO_BASE + 10) /* apply power to audio power amp. active high. */ +#define H3600_EGPIO_AUD_PWR_ON (H3XXX_EGPIO_BASE + 11) /* apply power to reset of audio circuit. active high. */ +#define H3600_EGPIO_QMUTE (H3XXX_EGPIO_BASE + 12) /* mute control for onboard UDA1341. active high. */ +#define H3600_EGPIO_IR_FSEL (H3XXX_EGPIO_BASE + 13) /* IR speed select: 1->fast, 0->slow */ +#define H3600_EGPIO_LCD_5V_ON (H3XXX_EGPIO_BASE + 14) /* enable 5V to LCD. active high. */ +#define H3600_EGPIO_LVDD_ON (H3XXX_EGPIO_BASE + 15) /* enable 9V and -6.5V to LCD. */ + +struct gpio_default_state { + int gpio; + int mode; + const char *name; +}; + +#define GPIO_MODE_IN -1 +#define GPIO_MODE_OUT0 0 +#define GPIO_MODE_OUT1 1 + +void h3xxx_init_gpio(struct gpio_default_state *s, size_t n); +void __init h3xxx_map_io(void); +void __init h3xxx_mach_init(void); + +#endif /* _INCLUDE_H3XXX_H_ */ diff --git a/arch/arm/mach-sa1100/include/mach/mcp.h b/arch/arm/mach-sa1100/include/mach/mcp.h index fb8b09a57ad..ed1a331508a 100644 --- a/arch/arm/mach-sa1100/include/mach/mcp.h +++ b/arch/arm/mach-sa1100/include/mach/mcp.h @@ -16,6 +16,7 @@ struct mcp_plat_data { u32 mccr0; u32 mccr1; unsigned int sclk_rate; + int gpio_base; }; #endif diff --git a/arch/arm/mach-sa1100/jornada720.c b/arch/arm/mach-sa1100/jornada720.c index fd776bb666c..13ebd2d99bf 100644 --- a/arch/arm/mach-sa1100/jornada720.c +++ b/arch/arm/mach-sa1100/jornada720.c @@ -354,7 +354,7 @@ static struct resource jornada720_flash_resource = { static void __init jornada720_mach_init(void) { - sa11x0_set_flash_data(&jornada720_flash_data, &jornada720_flash_resource, 1); + sa11x0_register_mtd(&jornada720_flash_data, &jornada720_flash_resource, 1); } MACHINE_START(JORNADA720, "HP Jornada 720") diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c index 1f940df0e5a..68069d6dc07 100644 --- a/arch/arm/mach-sa1100/lart.c +++ b/arch/arm/mach-sa1100/lart.c @@ -28,7 +28,7 @@ static struct mcp_plat_data lart_mcp_data = { static void __init lart_init(void) { - sa11x0_set_mcp_data(&lart_mcp_data); + sa11x0_register_mcp(&lart_mcp_data); } static struct map_desc lart_io_desc[] __initdata = { diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c index e1458bc1868..1ccd6018d3a 100644 --- a/arch/arm/mach-sa1100/pleb.c +++ b/arch/arm/mach-sa1100/pleb.c @@ -109,7 +109,7 @@ static struct flash_platform_data pleb_flash_data = { static void __init pleb_init(void) { - sa11x0_set_flash_data(&pleb_flash_data, pleb_flash_resources, + sa11x0_register_mtd(&pleb_flash_data, pleb_flash_resources, ARRAY_SIZE(pleb_flash_resources)); diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c index ddd917d1083..85e82bb73d7 100644 --- a/arch/arm/mach-sa1100/shannon.c +++ b/arch/arm/mach-sa1100/shannon.c @@ -59,8 +59,8 @@ static struct mcp_plat_data shannon_mcp_data = { static void __init shannon_init(void) { - sa11x0_set_flash_data(&shannon_flash_data, &shannon_flash_resource, 1); - sa11x0_set_mcp_data(&shannon_mcp_data); + sa11x0_register_mtd(&shannon_flash_data, &shannon_flash_resource, 1); + sa11x0_register_mcp(&shannon_mcp_data); } static void __init shannon_map_io(void) diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c index 3c74534f7fe..49cfd64663a 100644 --- a/arch/arm/mach-sa1100/simpad.c +++ b/arch/arm/mach-sa1100/simpad.c @@ -166,9 +166,9 @@ static void __init simpad_map_io(void) PCFR = 0; PSDR = 0; - sa11x0_set_flash_data(&simpad_flash_data, simpad_flash_resources, + sa11x0_register_mtd(&simpad_flash_data, simpad_flash_resources, ARRAY_SIZE(simpad_flash_resources)); - sa11x0_set_mcp_data(&simpad_mcp_data); + sa11x0_register_mcp(&simpad_mcp_data); } static void simpad_power_off(void) diff --git a/arch/avr32/include/asm/bug.h b/arch/avr32/include/asm/bug.h index 331d45bab18..2aa373cc61b 100644 --- a/arch/avr32/include/asm/bug.h +++ b/arch/avr32/include/asm/bug.h @@ -52,7 +52,7 @@ #define BUG() \ do { \ _BUG_OR_WARN(0); \ - for (;;); \ + unreachable(); \ } while (0) #define WARN_ON(condition) \ diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h index dcbaea7ce12..f0acde68aae 100644 --- a/arch/ia64/include/asm/swiotlb.h +++ b/arch/ia64/include/asm/swiotlb.h @@ -4,8 +4,6 @@ #include <linux/dma-mapping.h> #include <linux/swiotlb.h> -extern int swiotlb_force; - #ifdef CONFIG_SWIOTLB extern int swiotlb; extern void pci_swiotlb_init(void); diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 285aae8431c..53292abf846 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -41,7 +41,7 @@ struct dma_map_ops swiotlb_dma_ops = { void __init swiotlb_dma_init(void) { dma_ops = &swiotlb_dma_ops; - swiotlb_init(); + swiotlb_init(1); } void __init pci_swiotlb_init(void) @@ -51,7 +51,7 @@ void __init pci_swiotlb_init(void) swiotlb = 1; printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); machvec_init("dig"); - swiotlb_init(); + swiotlb_init(1); dma_ops = &swiotlb_dma_ops; #else panic("Unable to find Intel IOMMU"); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 1aad0d9f507..fd7620f025f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -358,7 +358,14 @@ config SGI_IP22 select SWAP_IO_SPACE select SYS_HAS_CPU_R4X00 select SYS_HAS_CPU_R5000 - select SYS_HAS_EARLY_PRINTK + # + # Disable EARLY_PRINTK for now since it leads to overwritten prom + # memory during early boot on some machines. + # + # See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com + # for a more details discussion + # + # select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN @@ -410,7 +417,14 @@ config SGI_IP28 select SGI_HAS_ZILOG select SWAP_IO_SPACE select SYS_HAS_CPU_R10000 - select SYS_HAS_EARLY_PRINTK + # + # Disable EARLY_PRINTK for now since it leads to overwritten prom + # memory during early boot on some machines. + # + # See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com + # for a more details discussion + # + # select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN help @@ -1439,6 +1453,7 @@ choice config PAGE_SIZE_4KB bool "4kB" + depends on !CPU_LOONGSON2 help This option select the standard 4kB Linux page size. On some R3000-family processors this is the only available page size. Using @@ -1763,7 +1778,7 @@ config SYS_SUPPORTS_SMARTMIPS config ARCH_FLATMEM_ENABLE def_bool y - depends on !NUMA + depends on !NUMA && !CPU_LOONGSON2 config ARCH_DISCONTIGMEM_ENABLE bool diff --git a/arch/mips/include/asm/bug.h b/arch/mips/include/asm/bug.h index 6cf29c26e87..540c98a810d 100644 --- a/arch/mips/include/asm/bug.h +++ b/arch/mips/include/asm/bug.h @@ -11,9 +11,7 @@ static inline void __noreturn BUG(void) { __asm__ __volatile__("break %0" : : "i" (BRK_BUG)); - /* Fool GCC into thinking the function doesn't return. */ - while (1) - ; + unreachable(); } #define HAVE_ARCH_BUG diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h index a2250f390a2..c892bfb3e2c 100644 --- a/arch/mips/include/asm/mman.h +++ b/arch/mips/include/asm/mman.h @@ -75,6 +75,7 @@ #define MADV_MERGEABLE 12 /* KSM may merge identical pages */ #define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ +#define MADV_HWPOISON 100 /* poison a page for testing */ /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/mips/include/asm/system.h b/arch/mips/include/asm/system.h index fcf5f98d90c..83b5509e09e 100644 --- a/arch/mips/include/asm/system.h +++ b/arch/mips/include/asm/system.h @@ -12,6 +12,7 @@ #ifndef _ASM_SYSTEM_H #define _ASM_SYSTEM_H +#include <linux/kernel.h> #include <linux/types.h> #include <linux/irqflags.h> @@ -193,10 +194,6 @@ extern __u64 __xchg_u64_unsupported_on_32bit_kernels(volatile __u64 * m, __u64 v #define __xchg_u64 __xchg_u64_unsupported_on_32bit_kernels #endif -/* This function doesn't exist, so you'll get a linker error - if something tries to do an invalid xchg(). */ -extern void __xchg_called_with_bad_pointer(void); - static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) { switch (size) { @@ -205,11 +202,17 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz case 8: return __xchg_u64(ptr, x); } - __xchg_called_with_bad_pointer(); + return x; } -#define xchg(ptr, x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) +#define xchg(ptr, x) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) & ~0xc); \ + \ + ((__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \ +}) extern void set_handler(unsigned long offset, void *addr, unsigned long len); extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len); diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 3fe1fcfa2e7..fe0d7980560 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -306,6 +306,7 @@ static inline int mips_atomic_set(struct pt_regs *regs, if (cpu_has_llsc && R10000_LLSC_WAR) { __asm__ __volatile__ ( + " .set mips3 \n" " li %[err], 0 \n" "1: ll %[old], (%[addr]) \n" " move %[tmp], %[new] \n" @@ -320,6 +321,7 @@ static inline int mips_atomic_set(struct pt_regs *regs, " "STR(PTR)" 1b, 4b \n" " "STR(PTR)" 2b, 4b \n" " .previous \n" + " .set mips0 \n" : [old] "=&r" (old), [err] "=&r" (err), [tmp] "=&r" (tmp) @@ -329,6 +331,7 @@ static inline int mips_atomic_set(struct pt_regs *regs, : "memory"); } else if (cpu_has_llsc) { __asm__ __volatile__ ( + " .set mips3 \n" " li %[err], 0 \n" "1: ll %[old], (%[addr]) \n" " move %[tmp], %[new] \n" @@ -347,6 +350,7 @@ static inline int mips_atomic_set(struct pt_regs *regs, " "STR(PTR)" 1b, 5b \n" " "STR(PTR)" 2b, 5b \n" " .previous \n" + " .set mips0 \n" : [old] "=&r" (old), [err] "=&r" (err), [tmp] "=&r" (tmp) diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index 9f40e1ff9b4..041fc1afc3f 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -110,7 +110,6 @@ static struct korina_device korina_dev0_data = { static struct platform_device korina_dev0 = { .id = -1, .name = "korina", - .dev.driver_data = &korina_dev0_data, .resource = korina_dev0_res, .num_resources = ARRAY_SIZE(korina_dev0_res), }; @@ -332,6 +331,8 @@ static int __init plat_setup_devices(void) /* set the uart clock to the current cpu frequency */ rb532_uart_res[0].uartclk = idt_cpu_freq; + dev_set_drvdata(&korina_dev0.dev, &korina_dev0_data); + return platform_add_devices(rb532_devs, ARRAY_SIZE(rb532_devs)); } diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 3b100518539..bf3382f1904 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE config HCALL_STATS bool "Hypervisor call instrumentation" - depends on PPC_PSERIES && DEBUG_FS + depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS help Adds code to keep track of the number of hypervisor calls made and the amount of time spent in hypervisor calls. Wall time spent in diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index f1889abb89b..c568329723b 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -1683,7 +1683,7 @@ CONFIG_HAVE_ARCH_KGDB=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_DEBUG_PAGEALLOC is not set -CONFIG_HCALL_STATS=y +# CONFIG_HCALL_STATS is not set # CONFIG_CODE_PATCHING_SELFTEST is not set # CONFIG_FTR_FIXUP_SELFTEST is not set # CONFIG_MSI_BITMAP_SELFTEST is not set diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h index 9154e852673..f0fb4fc1f6e 100644 --- a/arch/powerpc/include/asm/emulated_ops.h +++ b/arch/powerpc/include/asm/emulated_ops.h @@ -19,6 +19,7 @@ #define _ASM_POWERPC_EMULATED_OPS_H #include <asm/atomic.h> +#include <linux/perf_event.h> #ifdef CONFIG_PPC_EMULATED_STATS @@ -57,7 +58,7 @@ extern u32 ppc_warn_emulated; extern void ppc_warn_emulated_print(const char *type); -#define PPC_WARN_EMULATED(type) \ +#define __PPC_WARN_EMULATED(type) \ do { \ atomic_inc(&ppc_emulated.type.val); \ if (ppc_warn_emulated) \ @@ -66,8 +67,22 @@ extern void ppc_warn_emulated_print(const char *type); #else /* !CONFIG_PPC_EMULATED_STATS */ -#define PPC_WARN_EMULATED(type) do { } while (0) +#define __PPC_WARN_EMULATED(type) do { } while (0) #endif /* !CONFIG_PPC_EMULATED_STATS */ +#define PPC_WARN_EMULATED(type, regs) \ + do { \ + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \ + 1, 0, regs, 0); \ + __PPC_WARN_EMULATED(type); \ + } while (0) + +#define PPC_WARN_ALIGNMENT(type, regs) \ + do { \ + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \ + 1, 0, regs, regs->dar); \ + __PPC_WARN_EMULATED(type); \ + } while (0) + #endif /* _ASM_POWERPC_EMULATED_OPS_H */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 6251a4b10be..c27caac47ad 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -274,6 +274,8 @@ struct hcall_stats { unsigned long num_calls; /* number of calls (on this CPU) */ unsigned long tb_total; /* total wall time (mftb) of calls. */ unsigned long purr_total; /* total cpu time (PURR) of calls. */ + unsigned long tb_start; + unsigned long purr_start; }; #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 6315edc205d..bc8dd53f718 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -489,6 +489,8 @@ #define SPRN_MMCR1 798 #define SPRN_MMCRA 0x312 #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ +#define MMCRA_SDAR_DCACHE_MISS 0x40000000UL +#define MMCRA_SDAR_ERAT_MISS 0x20000000UL #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h new file mode 100644 index 00000000000..cbe2297d68b --- /dev/null +++ b/arch/powerpc/include/asm/trace.h @@ -0,0 +1,133 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM powerpc + +#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_POWERPC_H + +#include <linux/tracepoint.h> + +struct pt_regs; + +TRACE_EVENT(irq_entry, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +TRACE_EVENT(irq_exit, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +TRACE_EVENT(timer_interrupt_entry, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +TRACE_EVENT(timer_interrupt_exit, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +#ifdef CONFIG_PPC_PSERIES +extern void hcall_tracepoint_regfunc(void); +extern void hcall_tracepoint_unregfunc(void); + +TRACE_EVENT_FN(hcall_entry, + + TP_PROTO(unsigned long opcode, unsigned long *args), + + TP_ARGS(opcode, args), + + TP_STRUCT__entry( + __field(unsigned long, opcode) + ), + + TP_fast_assign( + __entry->opcode = opcode; + ), + + TP_printk("opcode=%lu", __entry->opcode), + + hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc +); + +TRACE_EVENT_FN(hcall_exit, + + TP_PROTO(unsigned long opcode, unsigned long retval, + unsigned long *retbuf), + + TP_ARGS(opcode, retval, retbuf), + + TP_STRUCT__entry( + __field(unsigned long, opcode) + __field(unsigned long, retval) + ), + + TP_fast_assign( + __entry->opcode = opcode; + __entry->retval = retval; + ), + + TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval), + + hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc +); +#endif + +#endif /* _TRACE_POWERPC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH asm +#define TRACE_INCLUDE_FILE trace + +#include <trace/define_trace.h> diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index a5b632e52fa..3839839f83c 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -732,7 +732,7 @@ int fix_alignment(struct pt_regs *regs) #ifdef CONFIG_SPE if ((instr >> 26) == 0x4) { - PPC_WARN_EMULATED(spe); + PPC_WARN_ALIGNMENT(spe, regs); return emulate_spe(regs, reg, instr); } #endif @@ -786,7 +786,7 @@ int fix_alignment(struct pt_regs *regs) flags |= SPLT; nb = 8; } - PPC_WARN_EMULATED(vsx); + PPC_WARN_ALIGNMENT(vsx, regs); return emulate_vsx(addr, reg, areg, regs, flags, nb); } #endif @@ -794,7 +794,7 @@ int fix_alignment(struct pt_regs *regs) * the exception of DCBZ which is handled as a special case here */ if (instr == DCBZ) { - PPC_WARN_EMULATED(dcbz); + PPC_WARN_ALIGNMENT(dcbz, regs); return emulate_dcbz(regs, addr); } if (unlikely(nb == 0)) @@ -804,7 +804,7 @@ int fix_alignment(struct pt_regs *regs) * function */ if (flags & M) { - PPC_WARN_EMULATED(multiple); + PPC_WARN_ALIGNMENT(multiple, regs); return emulate_multiple(regs, addr, reg, nb, flags, instr, swiz); } @@ -825,11 +825,11 @@ int fix_alignment(struct pt_regs *regs) /* Special case for 16-byte FP loads and stores */ if (nb == 16) { - PPC_WARN_EMULATED(fp_pair); + PPC_WARN_ALIGNMENT(fp_pair, regs); return emulate_fp_pair(addr, reg, flags); } - PPC_WARN_EMULATED(unaligned); + PPC_WARN_ALIGNMENT(unaligned, regs); /* If we are loading, get the data from user space, else * get it from register values diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 9763267e38b..bdcb557d470 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -551,7 +551,7 @@ restore: BEGIN_FW_FTR_SECTION ld r5,SOFTE(r1) FW_FTR_SECTION_ELSE - b iseries_check_pending_irqs + b .Liseries_check_pending_irqs ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); @@ -623,7 +623,7 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) #endif /* CONFIG_PPC_BOOK3E */ -iseries_check_pending_irqs: +.Liseries_check_pending_irqs: #ifdef CONFIG_PPC_ISERIES ld r5,SOFTE(r1) cmpdi 0,r5,0 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1808876edcc..c7eb4e0eb86 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -185,12 +185,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) * prolog code of the PerformanceMonitor one. A little * trickery is thus necessary */ +performance_monitor_pSeries_1: . = 0xf00 b performance_monitor_pSeries +altivec_unavailable_pSeries_1: . = 0xf20 b altivec_unavailable_pSeries +vsx_unavailable_pSeries_1: . = 0xf40 b vsx_unavailable_pSeries diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index e5d12117798..02a334662cc 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -70,6 +70,8 @@ #include <asm/firmware.h> #include <asm/lv1call.h> #endif +#define CREATE_TRACE_POINTS +#include <asm/trace.h> int __irq_offset_value; static int ppc_spurious_interrupts; @@ -325,6 +327,8 @@ void do_IRQ(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); unsigned int irq; + trace_irq_entry(regs); + irq_enter(); check_stack_overflow(); @@ -348,6 +352,8 @@ void do_IRQ(struct pt_regs *regs) timer_interrupt(regs); } #endif + + trace_irq_exit(regs); } void __init init_IRQ(void) diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 87f1663584b..1eb85fbf53a 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1165,7 +1165,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, */ if (record) { struct perf_sample_data data = { - .addr = 0, + .addr = ~0ULL, .period = event->hw.last_period, }; diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 0f4c1c73a6a..199de527d41 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -73,10 +73,6 @@ #define MMCR1_PMCSEL_MSK 0x7f /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index c351b3a57fb..98b6a729a9d 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -73,10 +73,6 @@ #define MMCR1_PMCSEL_MSK 0x7f /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 @@ -390,7 +386,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; unsigned int pmc, unit, byte, psel; unsigned int ttm, grp; int i, isbus, bit, grsel; diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index ca399ba5034..84a607bda8f 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -178,7 +178,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; int i; unsigned int pmc, ev, b, u, s, psel; unsigned int ttmset = 0; diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 28a4daacdc0..852f7b7f6b4 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -51,10 +51,6 @@ #define MMCR1_PMCSEL_MSK 0xff /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 @@ -230,7 +226,7 @@ static int power7_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; unsigned int pmc, unit, combine, l2sel, psel; unsigned int pmc_inuse = 0; int i; diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 479574413a9..8eff48e20db 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -84,10 +84,6 @@ static short mmcr1_adder_bits[8] = { }; /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4271f7a655a..845c72ab735 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -660,6 +660,7 @@ late_initcall(check_cache_coherency); #ifdef CONFIG_DEBUG_FS struct dentry *powerpc_debugfs_root; +EXPORT_SYMBOL(powerpc_debugfs_root); static int powerpc_debugfs_init(void) { diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 53bcf3d792d..b152de3e64d 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -345,7 +345,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_SWIOTLB if (ppc_swiotlb_enable) - swiotlb_init(); + swiotlb_init(1); #endif paging_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 04f638d82fb..df2c9e932b3 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -550,7 +550,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_SWIOTLB if (ppc_swiotlb_enable) - swiotlb_init(); + swiotlb_init(1); #endif paging_init(); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index a136a11c490..36707dec94d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -54,6 +54,7 @@ #include <linux/irq.h> #include <linux/delay.h> #include <linux/perf_event.h> +#include <asm/trace.h> #include <asm/io.h> #include <asm/processor.h> @@ -571,6 +572,8 @@ void timer_interrupt(struct pt_regs * regs) struct clock_event_device *evt = &decrementer->event; u64 now; + trace_timer_interrupt_entry(regs); + /* Ensure a positive value is written to the decrementer, or else * some CPUs will continuue to take decrementer exceptions */ set_dec(DECREMENTER_MAX); @@ -590,6 +593,7 @@ void timer_interrupt(struct pt_regs * regs) now = decrementer->next_tb - now; if (now <= DECREMENTER_MAX) set_dec((int)now); + trace_timer_interrupt_exit(regs); return; } old_regs = set_irq_regs(regs); @@ -620,6 +624,8 @@ void timer_interrupt(struct pt_regs * regs) irq_exit(); set_irq_regs(old_regs); + + trace_timer_interrupt_exit(regs); } void wakeup_decrementer(void) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6f0ae1a9bfa..9d1f9354d6c 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -759,7 +759,7 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulate the mfspr rD, PVR. */ if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) { - PPC_WARN_EMULATED(mfpvr); + PPC_WARN_EMULATED(mfpvr, regs); rd = (instword >> 21) & 0x1f; regs->gpr[rd] = mfspr(SPRN_PVR); return 0; @@ -767,7 +767,7 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulating the dcba insn is just a no-op. */ if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) { - PPC_WARN_EMULATED(dcba); + PPC_WARN_EMULATED(dcba, regs); return 0; } @@ -776,7 +776,7 @@ static int emulate_instruction(struct pt_regs *regs) int shift = (instword >> 21) & 0x1c; unsigned long msk = 0xf0000000UL >> shift; - PPC_WARN_EMULATED(mcrxr); + PPC_WARN_EMULATED(mcrxr, regs); regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); regs->xer &= ~0xf0000000UL; return 0; @@ -784,19 +784,19 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulate load/store string insn. */ if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) { - PPC_WARN_EMULATED(string); + PPC_WARN_EMULATED(string, regs); return emulate_string_inst(regs, instword); } /* Emulate the popcntb (Population Count Bytes) instruction. */ if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) { - PPC_WARN_EMULATED(popcntb); + PPC_WARN_EMULATED(popcntb, regs); return emulate_popcntb_inst(regs, instword); } /* Emulate isel (Integer Select) instruction */ if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) { - PPC_WARN_EMULATED(isel); + PPC_WARN_EMULATED(isel, regs); return emulate_isel(regs, instword); } @@ -995,7 +995,7 @@ void SoftwareEmulation(struct pt_regs *regs) #ifdef CONFIG_MATH_EMULATION errcode = do_mathemu(regs); if (errcode >= 0) - PPC_WARN_EMULATED(math); + PPC_WARN_EMULATED(math, regs); switch (errcode) { case 0: @@ -1018,7 +1018,7 @@ void SoftwareEmulation(struct pt_regs *regs) #elif defined(CONFIG_8XX_MINIMAL_FPEMU) errcode = Soft_emulate_8xx(regs); if (errcode >= 0) - PPC_WARN_EMULATED(8xx); + PPC_WARN_EMULATED(8xx, regs); switch (errcode) { case 0: @@ -1129,7 +1129,7 @@ void altivec_assist_exception(struct pt_regs *regs) flush_altivec_to_thread(current); - PPC_WARN_EMULATED(altivec); + PPC_WARN_EMULATED(altivec, regs); err = emulate_altivec(regs); if (err == 0) { regs->nip += 4; /* skip emulated instruction */ diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 75f3267fdc3..e68beac0a17 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -26,11 +26,11 @@ BEGIN_FTR_SECTION srd r8,r5,r11 mtctr r8 -setup: +.Lsetup: dcbt r9,r4 dcbz r9,r3 add r9,r9,r12 - bdnz setup + bdnz .Lsetup END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ) addi r3,r3,-8 srdi r8,r5,7 /* page is copied in 128 byte strides */ diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index c1427b3634e..383a5d0e981 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -14,68 +14,94 @@ #define STK_PARM(i) (48 + ((i)-3)*8) -#ifdef CONFIG_HCALL_STATS +#ifdef CONFIG_TRACEPOINTS + + .section ".toc","aw" + + .globl hcall_tracepoint_refcount +hcall_tracepoint_refcount: + .llong 0 + + .section ".text" + /* * precall must preserve all registers. use unused STK_PARM() - * areas to save snapshots and opcode. + * areas to save snapshots and opcode. We branch around this + * in early init (eg when populating the MMU hashtable) by using an + * unconditional cpu feature. */ -#define HCALL_INST_PRECALL \ - std r3,STK_PARM(r3)(r1); /* save opcode */ \ - mftb r0; /* get timebase and */ \ - std r0,STK_PARM(r5)(r1); /* save for later */ \ +#define HCALL_INST_PRECALL(FIRST_REG) \ BEGIN_FTR_SECTION; \ - mfspr r0,SPRN_PURR; /* get PURR and */ \ - std r0,STK_PARM(r6)(r1); /* save for later */ \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); - + b 1f; \ +END_FTR_SECTION(0, 1); \ + ld r12,hcall_tracepoint_refcount@toc(r2); \ + cmpdi r12,0; \ + beq+ 1f; \ + mflr r0; \ + std r3,STK_PARM(r3)(r1); \ + std r4,STK_PARM(r4)(r1); \ + std r5,STK_PARM(r5)(r1); \ + std r6,STK_PARM(r6)(r1); \ + std r7,STK_PARM(r7)(r1); \ + std r8,STK_PARM(r8)(r1); \ + std r9,STK_PARM(r9)(r1); \ + std r10,STK_PARM(r10)(r1); \ + std r0,16(r1); \ + addi r4,r1,STK_PARM(FIRST_REG); \ + stdu r1,-STACK_FRAME_OVERHEAD(r1); \ + bl .__trace_hcall_entry; \ + addi r1,r1,STACK_FRAME_OVERHEAD; \ + ld r0,16(r1); \ + ld r3,STK_PARM(r3)(r1); \ + ld r4,STK_PARM(r4)(r1); \ + ld r5,STK_PARM(r5)(r1); \ + ld r6,STK_PARM(r6)(r1); \ + ld r7,STK_PARM(r7)(r1); \ + ld r8,STK_PARM(r8)(r1); \ + ld r9,STK_PARM(r9)(r1); \ + ld r10,STK_PARM(r10)(r1); \ + mtlr r0; \ +1: + /* * postcall is performed immediately before function return which * allows liberal use of volatile registers. We branch around this * in early init (eg when populating the MMU hashtable) by using an * unconditional cpu feature. */ -#define HCALL_INST_POSTCALL \ +#define __HCALL_INST_POSTCALL \ BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ - ld r4,STK_PARM(r3)(r1); /* validate opcode */ \ - cmpldi cr7,r4,MAX_HCALL_OPCODE; \ - bgt- cr7,1f; \ - \ - /* get time and PURR snapshots after hcall */ \ - mftb r7; /* timebase after */ \ -BEGIN_FTR_SECTION; \ - mfspr r8,SPRN_PURR; /* PURR after */ \ - ld r6,STK_PARM(r6)(r1); /* PURR before */ \ - subf r6,r6,r8; /* delta */ \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ - ld r5,STK_PARM(r5)(r1); /* timebase before */ \ - subf r5,r5,r7; /* time delta */ \ - \ - /* calculate address of stat structure r4 = opcode */ \ - srdi r4,r4,2; /* index into array */ \ - mulli r4,r4,HCALL_STAT_SIZE; \ - LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \ - add r4,r4,r7; \ - ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \ - add r4,r4,r7; \ - \ - /* update stats */ \ - ld r7,HCALL_STAT_CALLS(r4); /* count */ \ - addi r7,r7,1; \ - std r7,HCALL_STAT_CALLS(r4); \ - ld r7,HCALL_STAT_TB(r4); /* timebase */ \ - add r7,r7,r5; \ - std r7,HCALL_STAT_TB(r4); \ -BEGIN_FTR_SECTION; \ - ld r7,HCALL_STAT_PURR(r4); /* PURR */ \ - add r7,r7,r6; \ - std r7,HCALL_STAT_PURR(r4); \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ + ld r12,hcall_tracepoint_refcount@toc(r2); \ + cmpdi r12,0; \ + beq+ 1f; \ + mflr r0; \ + ld r6,STK_PARM(r3)(r1); \ + std r3,STK_PARM(r3)(r1); \ + mr r4,r3; \ + mr r3,r6; \ + std r0,16(r1); \ + stdu r1,-STACK_FRAME_OVERHEAD(r1); \ + bl .__trace_hcall_exit; \ + addi r1,r1,STACK_FRAME_OVERHEAD; \ + ld r0,16(r1); \ + ld r3,STK_PARM(r3)(r1); \ + mtlr r0; \ 1: + +#define HCALL_INST_POSTCALL_NORETS \ + li r5,0; \ + __HCALL_INST_POSTCALL + +#define HCALL_INST_POSTCALL(BUFREG) \ + mr r5,BUFREG; \ + __HCALL_INST_POSTCALL + #else -#define HCALL_INST_PRECALL -#define HCALL_INST_POSTCALL +#define HCALL_INST_PRECALL(FIRST_ARG) +#define HCALL_INST_POSTCALL_NORETS +#define HCALL_INST_POSTCALL(BUFREG) #endif .text @@ -86,11 +112,11 @@ _GLOBAL(plpar_hcall_norets) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL + HCALL_INST_PRECALL(r4) HVSC /* invoke the hypervisor */ - HCALL_INST_POSTCALL + HCALL_INST_POSTCALL_NORETS lwz r0,8(r1) mtcrf 0xff,r0 @@ -102,7 +128,7 @@ _GLOBAL(plpar_hcall) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL + HCALL_INST_PRECALL(r5) std r4,STK_PARM(r4)(r1) /* Save ret buffer */ @@ -121,7 +147,7 @@ _GLOBAL(plpar_hcall) std r6, 16(r12) std r7, 24(r12) - HCALL_INST_POSTCALL + HCALL_INST_POSTCALL(r12) lwz r0,8(r1) mtcrf 0xff,r0 @@ -168,7 +194,7 @@ _GLOBAL(plpar_hcall9) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL + HCALL_INST_PRECALL(r5) std r4,STK_PARM(r4)(r1) /* Save ret buffer */ @@ -196,7 +222,7 @@ _GLOBAL(plpar_hcall9) std r11,56(r12) std r0, 64(r12) - HCALL_INST_POSTCALL + HCALL_INST_POSTCALL(r12) lwz r0,8(r1) mtcrf 0xff,r0 diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 3631a4f277e..2f58c71b725 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -26,6 +26,7 @@ #include <asm/hvcall.h> #include <asm/firmware.h> #include <asm/cputable.h> +#include <asm/trace.h> DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); @@ -100,6 +101,35 @@ static const struct file_operations hcall_inst_seq_fops = { #define HCALL_ROOT_DIR "hcall_inst" #define CPU_NAME_BUF_SIZE 32 + +static void probe_hcall_entry(unsigned long opcode, unsigned long *args) +{ + struct hcall_stats *h; + + if (opcode > MAX_HCALL_OPCODE) + return; + + h = &get_cpu_var(hcall_stats)[opcode / 4]; + h->tb_start = mftb(); + h->purr_start = mfspr(SPRN_PURR); +} + +static void probe_hcall_exit(unsigned long opcode, unsigned long retval, + unsigned long *retbuf) +{ + struct hcall_stats *h; + + if (opcode > MAX_HCALL_OPCODE) + return; + + h = &__get_cpu_var(hcall_stats)[opcode / 4]; + h->num_calls++; + h->tb_total = mftb() - h->tb_start; + h->purr_total = mfspr(SPRN_PURR) - h->purr_start; + + put_cpu_var(hcall_stats); +} + static int __init hcall_inst_init(void) { struct dentry *hcall_root; @@ -110,6 +140,14 @@ static int __init hcall_inst_init(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) return 0; + if (register_trace_hcall_entry(probe_hcall_entry)) + return -EINVAL; + + if (register_trace_hcall_exit(probe_hcall_exit)) { + unregister_trace_hcall_entry(probe_hcall_entry); + return -EINVAL; + } + hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); if (!hcall_root) return -ENOMEM; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 903eb9eec68..0707653612b 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -39,6 +39,7 @@ #include <asm/cputable.h> #include <asm/udbg.h> #include <asm/smp.h> +#include <asm/trace.h> #include "plpar_wrappers.h" #include "pseries.h" @@ -661,3 +662,35 @@ void arch_free_page(struct page *page, int order) EXPORT_SYMBOL(arch_free_page); #endif + +#ifdef CONFIG_TRACEPOINTS +/* + * We optimise our hcall path by placing hcall_tracepoint_refcount + * directly in the TOC so we can check if the hcall tracepoints are + * enabled via a single load. + */ + +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */ +extern long hcall_tracepoint_refcount; + +void hcall_tracepoint_regfunc(void) +{ + hcall_tracepoint_refcount++; +} + +void hcall_tracepoint_unregfunc(void) +{ + hcall_tracepoint_refcount--; +} + +void __trace_hcall_entry(unsigned long opcode, unsigned long *args) +{ + trace_hcall_entry(opcode, args); +} + +void __trace_hcall_exit(long opcode, unsigned long retval, + unsigned long *retbuf) +{ + trace_hcall_exit(opcode, retval, retbuf); +} +#endif diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 43c0acad716..16c673096a2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -95,6 +95,34 @@ config S390 select HAVE_ARCH_TRACEHOOK select INIT_ALL_POSSIBLE select HAVE_PERF_EVENTS + select ARCH_INLINE_SPIN_TRYLOCK + select ARCH_INLINE_SPIN_TRYLOCK_BH + select ARCH_INLINE_SPIN_LOCK + select ARCH_INLINE_SPIN_LOCK_BH + select ARCH_INLINE_SPIN_LOCK_IRQ + select ARCH_INLINE_SPIN_LOCK_IRQSAVE + select ARCH_INLINE_SPIN_UNLOCK + select ARCH_INLINE_SPIN_UNLOCK_BH + select ARCH_INLINE_SPIN_UNLOCK_IRQ + select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE + select ARCH_INLINE_READ_TRYLOCK + select ARCH_INLINE_READ_LOCK + select ARCH_INLINE_READ_LOCK_BH + select ARCH_INLINE_READ_LOCK_IRQ + select ARCH_INLINE_READ_LOCK_IRQSAVE + select ARCH_INLINE_READ_UNLOCK + select ARCH_INLINE_READ_UNLOCK_BH + select ARCH_INLINE_READ_UNLOCK_IRQ + select ARCH_INLINE_READ_UNLOCK_IRQRESTORE + select ARCH_INLINE_WRITE_TRYLOCK + select ARCH_INLINE_WRITE_LOCK + select ARCH_INLINE_WRITE_LOCK_BH + select ARCH_INLINE_WRITE_LOCK_IRQ + select ARCH_INLINE_WRITE_LOCK_IRQSAVE + select ARCH_INLINE_WRITE_UNLOCK + select ARCH_INLINE_WRITE_UNLOCK_BH + select ARCH_INLINE_WRITE_UNLOCK_IRQ + select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE config SCHED_OMIT_FRAME_POINTER bool diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index 7efd0abe888..efb74fd5156 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -49,7 +49,7 @@ #define BUG() do { \ __EMIT_BUG(0); \ - for (;;); \ + unreachable(); \ } while (0) #define WARN_ON(x) ({ \ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 41ce6861174..c9af0d19c7a 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -191,33 +191,4 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) #define _raw_read_relax(lock) cpu_relax() #define _raw_write_relax(lock) cpu_relax() -#define __always_inline__spin_lock -#define __always_inline__read_lock -#define __always_inline__write_lock -#define __always_inline__spin_lock_bh -#define __always_inline__read_lock_bh -#define __always_inline__write_lock_bh -#define __always_inline__spin_lock_irq -#define __always_inline__read_lock_irq -#define __always_inline__write_lock_irq -#define __always_inline__spin_lock_irqsave -#define __always_inline__read_lock_irqsave -#define __always_inline__write_lock_irqsave -#define __always_inline__spin_trylock -#define __always_inline__read_trylock -#define __always_inline__write_trylock -#define __always_inline__spin_trylock_bh -#define __always_inline__spin_unlock -#define __always_inline__read_unlock -#define __always_inline__write_unlock -#define __always_inline__spin_unlock_bh -#define __always_inline__read_unlock_bh -#define __always_inline__write_unlock_bh -#define __always_inline__spin_unlock_irq -#define __always_inline__read_unlock_irq -#define __always_inline__write_unlock_irq -#define __always_inline__spin_unlock_irqrestore -#define __always_inline__read_unlock_irqrestore -#define __always_inline__write_unlock_irqrestore - #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index f5fe34dd821..5a82bc68193 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -203,73 +203,10 @@ out: #ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; extern unsigned int sys_call_table[]; -static struct syscall_metadata **syscalls_metadata; - -struct syscall_metadata *syscall_nr_to_meta(int nr) -{ - if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) - return NULL; - - return syscalls_metadata[nr]; -} - -int syscall_name_to_nr(char *name) -{ - int i; - - if (!syscalls_metadata) - return -1; - for (i = 0; i < NR_syscalls; i++) - if (syscalls_metadata[i]) - if (!strcmp(syscalls_metadata[i]->name, name)) - return i; - return -1; -} - -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) +unsigned long __init arch_syscall_addr(int nr) { - syscalls_metadata[num]->exit_id = id; -} - -static struct syscall_metadata *find_syscall_meta(unsigned long syscall) -{ - struct syscall_metadata *start; - struct syscall_metadata *stop; - char str[KSYM_SYMBOL_LEN]; - - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; - kallsyms_lookup(syscall, NULL, NULL, NULL, str); - - for ( ; start < stop; start++) { - if (start->name && !strcmp(start->name + 3, str + 3)) - return start; - } - return NULL; -} - -static int __init arch_init_ftrace_syscalls(void) -{ - struct syscall_metadata *meta; - int i; - syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls, - GFP_KERNEL); - if (!syscalls_metadata) - return -ENOMEM; - for (i = 0; i < NR_syscalls; i++) { - meta = find_syscall_meta((unsigned long)sys_call_table[i]); - syscalls_metadata[i] = meta; - } - return 0; + return (unsigned long)sys_call_table[nr]; } -arch_initcall(arch_init_ftrace_syscalls); #endif diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 72ace9515a0..178084b4377 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -49,6 +49,7 @@ config X86 select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA + select HAVE_HW_BREAKPOINT select HAVE_ARCH_KMEMCHECK config OUTPUT_FORMAT diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2649840d888..5e99762eb5c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -406,7 +406,7 @@ config X86_CMPXCHG64 # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM) + depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) config X86_MINIMUM_CPU_FAMILY int diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29bb6b..731318e5ac1 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -186,6 +186,15 @@ config X86_DS_SELFTEST config HAVE_MMIOTRACE_SUPPORT def_bool y +config X86_DECODER_SELFTEST + bool "x86 instruction decoder selftest" + depends on DEBUG_KERNEL + ---help--- + Perform x86 instruction decoder selftests at build time. + This option is useful for checking the sanity of x86 instruction + decoder code. + If unsure, say "N". + # # IO delay types: # @@ -287,4 +296,18 @@ config OPTIMIZE_INLINING If unsure, say N. +config DEBUG_STRICT_USER_COPY_CHECKS + bool "Strict copy size checks" + depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING + ---help--- + Enabling this option turns a certain set of sanity checks for user + copy operations into compile time failures. + + The copy_from_user() etc checks are there to help test if there + are sufficient security checks on the length argument of + the copy operation, by having gcc prove that the argument is + within bounds. + + If unsure, or if you run an older (pre 4.4) gcc, say N. + endmenu diff --git a/arch/x86/Makefile b/arch/x86/Makefile index d2d24c9ee64..78b32be55e9 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -155,6 +155,9 @@ all: bzImage KBUILD_IMAGE := $(boot)/bzImage bzImage: vmlinux +ifeq ($(CONFIG_X86_DECODER_SELFTEST),y) + $(Q)$(MAKE) $(build)=arch/x86/tools posttest +endif $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 30e9a264f69..cbf0776dbec 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -41,7 +41,7 @@ cflags-$(CONFIG_X86_ELAN) += -march=i486 # Geode GX1 support cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx - +cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx) # add at the end to overwrite eventual tuning options from earlier # cpu entries cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4a8e80cdcfa..9f828f87ca3 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -10,6 +10,7 @@ header-y += ptrace-abi.h header-y += sigcontext32.h header-y += ucontext.h header-y += processor-flags.h +header-y += hw_breakpoint.h unifdef-y += e820.h unifdef-y += ist.h diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index bb70e397aa8..7a15588e45d 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -17,6 +17,7 @@ #include <linux/user.h> #include <linux/elfcore.h> +#include <asm/debugreg.h> /* * fill in the user structure for an a.out core dump @@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) >> PAGE_SHIFT; dump->u_dsize -= dump->u_tsize; dump->u_ssize = 0; - dump->u_debugreg[0] = current->thread.debugreg0; - dump->u_debugreg[1] = current->thread.debugreg1; - dump->u_debugreg[2] = current->thread.debugreg2; - dump->u_debugreg[3] = current->thread.debugreg3; - dump->u_debugreg[4] = 0; - dump->u_debugreg[5] = 0; - dump->u_debugreg[6] = current->thread.debugreg6; - dump->u_debugreg[7] = current->thread.debugreg7; + aout_dump_debugregs(dump); if (dump->start_stack < TASK_SIZE) dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index e2077d343c3..b97f786a48d 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -1,17 +1,13 @@ #ifdef __ASSEMBLY__ -#ifdef CONFIG_X86_32 -# define X86_ALIGN .long -#else -# define X86_ALIGN .quad -#endif +#include <asm/asm.h> #ifdef CONFIG_SMP .macro LOCK_PREFIX 1: lock .section .smp_locks,"a" - .align 4 - X86_ALIGN 1b + _ASM_ALIGN + _ASM_PTR 1b .previous .endm #else diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index c240efc74e0..69b74a7b877 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -84,6 +84,7 @@ static inline void alternatives_smp_switch(int smp) {} " .byte " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* sourcelen */ \ " .byte 664f-663f\n" /* replacementlen */ \ + " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ ".previous\n" \ ".section .altinstr_replacement, \"ax\"\n" \ "663:\n\t" newinstr "\n664:\n" /* replacement */ \ diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 4b180897e6b..5af2982133b 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel <joerg.roedel@amd.com> * Leo Duran <leo.duran@amd.com> * @@ -23,19 +23,13 @@ #include <linux/irqreturn.h> #ifdef CONFIG_AMD_IOMMU -extern int amd_iommu_init(void); -extern int amd_iommu_init_dma_ops(void); -extern int amd_iommu_init_passthrough(void); + extern void amd_iommu_detect(void); -extern irqreturn_t amd_iommu_int_handler(int irq, void *data); -extern void amd_iommu_flush_all_domains(void); -extern void amd_iommu_flush_all_devices(void); -extern void amd_iommu_shutdown(void); -extern void amd_iommu_apply_erratum_63(u16 devid); + #else -static inline int amd_iommu_init(void) { return -ENODEV; } + static inline void amd_iommu_detect(void) { } -static inline void amd_iommu_shutdown(void) { } + #endif #endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h new file mode 100644 index 00000000000..84786fb9a23 --- /dev/null +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2009 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <joerg.roedel@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_AMD_IOMMU_PROTO_H +#define _ASM_X86_AMD_IOMMU_PROTO_H + +struct amd_iommu; + +extern int amd_iommu_init_dma_ops(void); +extern int amd_iommu_init_passthrough(void); +extern irqreturn_t amd_iommu_int_handler(int irq, void *data); +extern void amd_iommu_flush_all_domains(void); +extern void amd_iommu_flush_all_devices(void); +extern void amd_iommu_apply_erratum_63(u16 devid); +extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); + +#ifndef CONFIG_AMD_IOMMU_STATS + +static inline void amd_iommu_stats_init(void) { } + +#endif /* !CONFIG_AMD_IOMMU_STATS */ + +#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 2a2cc7a78a8..ba19ad4c47d 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel <joerg.roedel@amd.com> * Leo Duran <leo.duran@amd.com> * @@ -25,6 +25,11 @@ #include <linux/spinlock.h> /* + * Maximum number of IOMMUs supported + */ +#define MAX_IOMMUS 32 + +/* * some size calculation constants */ #define DEV_TABLE_ENTRY_SIZE 32 @@ -206,6 +211,9 @@ extern bool amd_iommu_dump; printk(KERN_INFO "AMD-Vi: " format, ## arg); \ } while(0); +/* global flag if IOMMUs cache non-present entries */ +extern bool amd_iommu_np_cache; + /* * Make iterating over all IOMMUs easier */ @@ -226,6 +234,8 @@ extern bool amd_iommu_dump; * independent of their use. */ struct protection_domain { + struct list_head list; /* for list of all protection domains */ + struct list_head dev_list; /* List of all devices in this domain */ spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ int mode; /* paging mode (0-6 levels) */ @@ -233,7 +243,20 @@ struct protection_domain { unsigned long flags; /* flags to find out type of domain */ bool updated; /* complete domain flush required */ unsigned dev_cnt; /* devices assigned to this domain */ + unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ void *priv; /* private data */ + +}; + +/* + * This struct contains device specific data for the IOMMU + */ +struct iommu_dev_data { + struct list_head list; /* For domain->dev_list */ + struct device *dev; /* Device this data belong to */ + struct device *alias; /* The Alias Device */ + struct protection_domain *domain; /* Domain the device is bound to */ + atomic_t bind; /* Domain attach reverent count */ }; /* @@ -291,6 +314,9 @@ struct dma_ops_domain { struct amd_iommu { struct list_head list; + /* Index within the IOMMU array */ + int index; + /* locks the accesses to the hardware */ spinlock_t lock; @@ -357,6 +383,21 @@ struct amd_iommu { extern struct list_head amd_iommu_list; /* + * Array with pointers to each IOMMU struct + * The indices are referenced in the protection domains + */ +extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; + +/* Number of IOMMUs present in the system */ +extern int amd_iommus_present; + +/* + * Declarations for the global list of all protection domains + */ +extern spinlock_t amd_iommu_pd_lock; +extern struct list_head amd_iommu_pd_list; + +/* * Structure defining one entry in the device table */ struct dev_table_entry { @@ -416,15 +457,9 @@ extern unsigned amd_iommu_aperture_order; /* largest PCI device id we expect translation requests for */ extern u16 amd_iommu_last_bdf; -/* data structures for protection domain handling */ -extern struct protection_domain **amd_iommu_pd_table; - /* allocation bitmap for domain ids */ extern unsigned long *amd_iommu_pd_alloc_bitmap; -/* will be 1 if device isolation is enabled */ -extern bool amd_iommu_isolate; - /* * If true, the addresses will be flushed on unmap time, not when * they are reused @@ -462,11 +497,6 @@ struct __iommu_counter { #define ADD_STATS_COUNTER(name, x) #define SUB_STATS_COUNTER(name, x) -static inline void amd_iommu_stats_init(void) { } - #endif /* CONFIG_AMD_IOMMU_STATS */ -/* some function prototypes */ -extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); - #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 474d80d3e6c..b4ac2cdcb64 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -297,20 +297,20 @@ struct apic { int disable_esr; int dest_logical; - unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); + unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); unsigned long (*check_apicid_present)(int apicid); void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); - physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); + void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); void (*setup_apic_routing)(void); int (*multi_timer_check)(int apic, int irq); int (*apicid_to_node)(int logical_apicid); int (*cpu_to_logical_apicid)(int cpu); int (*cpu_present_to_apicid)(int mps_cpu); - physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); + void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); void (*setup_portio_remap)(void); int (*check_phys_apicid_present)(int phys_apicid); void (*enable_apic_mode)(void); @@ -488,6 +488,8 @@ static inline unsigned int read_apic_id(void) extern void default_setup_apic_routing(void); +extern struct apic apic_noop; + #ifdef CONFIG_X86_32 extern struct apic apic_default; @@ -532,9 +534,9 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return (unsigned int)(mask1 & mask2 & mask3); } -static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) { - return physid_isset(apicid, bitmap); + return physid_isset(apicid, *map); } static inline unsigned long default_check_apicid_present(int bit) @@ -542,9 +544,9 @@ static inline unsigned long default_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } -static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) +static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { - return phys_map; + *retmap = *phys_map; } /* Mapping from cpu number to logical apicid */ @@ -583,11 +585,6 @@ extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int phys_apicid); #endif -static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 3b62da926de..7fe3b3060f0 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -11,6 +11,12 @@ #define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000 #define APIC_DEFAULT_PHYS_BASE 0xfee00000 +/* + * This is the IO-APIC register space as specified + * by Intel docs: + */ +#define IO_APIC_SLOT_SIZE 1024 + #define APIC_ID 0x20 #define APIC_LVR 0x30 diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h deleted file mode 100644 index 82f613c607c..00000000000 --- a/arch/x86/include/asm/apicnum.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_X86_APICNUM_H -#define _ASM_X86_APICNUM_H - -/* define MAX_IO_APICS */ -#ifdef CONFIG_X86_32 -# define MAX_IO_APICS 64 -#else -# define MAX_IO_APICS 128 -# define MAX_LOCAL_APIC 32768 -#endif - -#endif /* _ASM_X86_APICNUM_H */ diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index d9cf1cd156d..f654d1bb17f 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -22,14 +22,14 @@ do { \ ".popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (sizeof(struct bug_entry))); \ - for (;;) ; \ + unreachable(); \ } while (0) #else #define BUG() \ do { \ asm volatile("ud2"); \ - for (;;) ; \ + unreachable(); \ } while (0) #endif diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h index b03bedb62aa..0918654305a 100644 --- a/arch/x86/include/asm/calgary.h +++ b/arch/x86/include/asm/calgary.h @@ -62,10 +62,8 @@ struct cal_chipset_ops { extern int use_calgary; #ifdef CONFIG_CALGARY_IOMMU -extern int calgary_iommu_init(void); extern void detect_calgary(void); #else -static inline int calgary_iommu_init(void) { return 1; } static inline void detect_calgary(void) { return; } #endif diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ee1931be659..ffb9bb6b6c3 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -8,14 +8,50 @@ * you need to test for the feature in boot_cpu_data. */ -#define xchg(ptr, v) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), sizeof(*(ptr)))) +extern void __xchg_wrong_size(void); + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway + * Note 2: xchg has side effect, so that attribute volatile is necessary, + * but generally the primitive is invalid, *ptr is output argument. --ANK + */ struct __xchg_dummy { unsigned long a[100]; }; #define __xg(x) ((struct __xchg_dummy *)(x)) +#define __xchg(x, ptr, size) \ +({ \ + __typeof(*(ptr)) __x = (x); \ + switch (size) { \ + case 1: \ + asm volatile("xchgb %b0,%1" \ + : "=q" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("xchgw %w0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("xchgl %0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + default: \ + __xchg_wrong_size(); \ + } \ + __x; \ +}) + +#define xchg(ptr, v) \ + __xchg((v), (ptr), sizeof(*ptr)) + /* * The semantics of XCHGCMP8B are a bit strange, this is why * there is a loop and the loading of %%eax and %%edx has to @@ -71,57 +107,63 @@ static inline void __set_64bit_var(unsigned long long *ptr, (unsigned int)((value) >> 32)) \ : __set_64bit(ptr, ll_low((value)), ll_high((value)))) -/* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - * but generally the primitive is invalid, *ptr is output argument. --ANK - */ -static inline unsigned long __xchg(unsigned long x, volatile void *ptr, - int size) -{ - switch (size) { - case 1: - asm volatile("xchgb %b0,%1" - : "=q" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 2: - asm volatile("xchgw %w0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 4: - asm volatile("xchgl %0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - } - return x; -} +extern void __cmpxchg_wrong_size(void); /* * Atomic compare and exchange. Compare OLD with MEM, if identical, * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case 1: \ + asm volatile(lock "cmpxchgb %b1,%2" \ + : "=a"(__ret) \ + : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile(lock "cmpxchgw %w1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile(lock "cmpxchgl %1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) + +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) + +#define __sync_cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") + +#define __cmpxchg_local(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "") #ifdef CONFIG_X86_CMPXCHG #define __HAVE_ARCH_CMPXCHG 1 -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define sync_cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) + +#define cmpxchg(ptr, old, new) \ + __cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define sync_cmpxchg(ptr, old, new) \ + __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define cmpxchg_local(ptr, old, new) \ + __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) #endif #ifdef CONFIG_X86_CMPXCHG64 @@ -133,94 +175,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, (unsigned long long)(n))) #endif -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile(LOCK_PREFIX "cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -/* - * Always use locked operations when touching memory shared with a - * hypervisor, since the system may be SMP even if the guest kernel - * isn't. - */ -static inline unsigned long __sync_cmpxchg(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("lock; cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("lock; cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("lock; cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -static inline unsigned long __cmpxchg_local(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old, unsigned long long new) diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 52de72e0de8..485ae415fae 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -3,9 +3,6 @@ #include <asm/alternative.h> /* Provides LOCK_PREFIX */ -#define xchg(ptr, v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v), \ - (ptr), sizeof(*(ptr)))) - #define __xg(x) ((volatile long *)(x)) static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) @@ -15,167 +12,118 @@ static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) #define _set_64bit set_64bit +extern void __xchg_wrong_size(void); +extern void __cmpxchg_wrong_size(void); + /* * Note: no "lock" prefix even on SMP: xchg always implies lock anyway * Note 2: xchg has side effect, so that attribute volatile is necessary, * but generally the primitive is invalid, *ptr is output argument. --ANK */ -static inline unsigned long __xchg(unsigned long x, volatile void *ptr, - int size) -{ - switch (size) { - case 1: - asm volatile("xchgb %b0,%1" - : "=q" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 2: - asm volatile("xchgw %w0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 4: - asm volatile("xchgl %k0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 8: - asm volatile("xchgq %0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - } - return x; -} +#define __xchg(x, ptr, size) \ +({ \ + __typeof(*(ptr)) __x = (x); \ + switch (size) { \ + case 1: \ + asm volatile("xchgb %b0,%1" \ + : "=q" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("xchgw %w0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("xchgl %k0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile("xchgq %0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + default: \ + __xchg_wrong_size(); \ + } \ + __x; \ +}) + +#define xchg(ptr, v) \ + __xchg((v), (ptr), sizeof(*ptr)) + +#define __HAVE_ARCH_CMPXCHG 1 /* * Atomic compare and exchange. Compare OLD with MEM, if identical, * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case 1: \ + asm volatile(lock "cmpxchgb %b1,%2" \ + : "=a"(__ret) \ + : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile(lock "cmpxchgw %w1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile(lock "cmpxchgl %k1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile(lock "cmpxchgq %1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) -#define __HAVE_ARCH_CMPXCHG 1 +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 8: - asm volatile(LOCK_PREFIX "cmpxchgq %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define __sync_cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") -/* - * Always use locked operations when touching memory shared with a - * hypervisor, since the system may be SMP even if the guest kernel - * isn't. - */ -static inline unsigned long __sync_cmpxchg(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("lock; cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("lock; cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("lock; cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define __cmpxchg_local(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "") -static inline unsigned long __cmpxchg_local(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("cmpxchgl %k1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 8: - asm volatile("cmpxchgq %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define cmpxchg(ptr, old, new) \ + __cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define sync_cmpxchg(ptr, old, new) \ + __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define cmpxchg_local(ptr, old, new) \ + __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr)))) #define cmpxchg64(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg((ptr), (o), (n)); \ }) -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define sync_cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) + #define cmpxchg64_local(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 3ea6f37be9e..8240f76b531 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -18,6 +18,7 @@ #define DR_TRAP1 (0x2) /* db1 */ #define DR_TRAP2 (0x4) /* db2 */ #define DR_TRAP3 (0x8) /* db3 */ +#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) #define DR_STEP (0x4000) /* single-step */ #define DR_SWITCH (0x8000) /* task switch */ @@ -49,6 +50,8 @@ #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ +#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */ +#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */ #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ @@ -67,4 +70,34 @@ #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ +/* + * HW breakpoint additions + */ +#ifdef __KERNEL__ + +DECLARE_PER_CPU(unsigned long, cpu_dr7); + +static inline void hw_breakpoint_disable(void) +{ + /* Zero the control register for HW Breakpoint */ + set_debugreg(0UL, 7); + + /* Zero-out the individual HW breakpoint address registers */ + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); +} + +static inline int hw_breakpoint_active(void) +{ + return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; +} + +extern void aout_dump_debugregs(struct user *dump); + +extern void hw_breakpoint_restore(void); + +#endif /* __KERNEL__ */ + #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index cee34e9ca45..029f230ab63 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -8,7 +8,7 @@ struct dev_archdata { #ifdef CONFIG_X86_64 struct dma_map_ops *dma_ops; #endif -#ifdef CONFIG_DMAR +#if defined(CONFIG_DMAR) || defined(CONFIG_AMD_IOMMU) void *iommu; /* hook for IOMMU specific extension */ #endif }; diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 6a25d5d4283..0f6c02f3b7d 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -20,7 +20,8 @@ # define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) #endif -extern dma_addr_t bad_dma_address; +#define DMA_ERROR_CODE 0 + extern int iommu_merge; extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; @@ -48,7 +49,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); - return (dma_addr == bad_dma_address); + return (dma_addr == DMA_ERROR_CODE); } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 6cfdafa409d..4ac5b0f33fc 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h @@ -35,8 +35,7 @@ extern int gart_iommu_aperture_allowed; extern int gart_iommu_aperture_disabled; extern void early_gart_iommu_check(void); -extern void gart_iommu_init(void); -extern void gart_iommu_shutdown(void); +extern int gart_iommu_init(void); extern void __init gart_parse_options(char *); extern void gart_iommu_hole_init(void); @@ -48,12 +47,6 @@ extern void gart_iommu_hole_init(void); static inline void early_gart_iommu_check(void) { } -static inline void gart_iommu_init(void) -{ -} -static inline void gart_iommu_shutdown(void) -{ -} static inline void gart_parse_options(char *options) { } diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 82e3e8f0104..108eb6fd1ae 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -20,11 +20,11 @@ typedef struct { unsigned int irq_call_count; unsigned int irq_tlb_count; #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR unsigned int irq_thermal_count; -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD unsigned int irq_threshold_count; -# endif #endif } ____cacheline_aligned irq_cpustat_t; diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h new file mode 100644 index 00000000000..0675a7c4c20 --- /dev/null +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -0,0 +1,73 @@ +#ifndef _I386_HW_BREAKPOINT_H +#define _I386_HW_BREAKPOINT_H + +#ifdef __KERNEL__ +#define __ARCH_HW_BREAKPOINT_H + +/* + * The name should probably be something dealt in + * a higher level. While dealing with the user + * (display/resolving) + */ +struct arch_hw_breakpoint { + char *name; /* Contains name of the symbol to set bkpt */ + unsigned long address; + u8 len; + u8 type; +}; + +#include <linux/kdebug.h> +#include <linux/percpu.h> +#include <linux/list.h> + +/* Available HW breakpoint length encodings */ +#define X86_BREAKPOINT_LEN_1 0x40 +#define X86_BREAKPOINT_LEN_2 0x44 +#define X86_BREAKPOINT_LEN_4 0x4c +#define X86_BREAKPOINT_LEN_EXECUTE 0x40 + +#ifdef CONFIG_X86_64 +#define X86_BREAKPOINT_LEN_8 0x48 +#endif + +/* Available HW breakpoint type encodings */ + +/* trigger on instruction execute */ +#define X86_BREAKPOINT_EXECUTE 0x80 +/* trigger on memory write */ +#define X86_BREAKPOINT_WRITE 0x81 +/* trigger on memory read or write */ +#define X86_BREAKPOINT_RW 0x83 + +/* Total number of available HW breakpoint registers */ +#define HBP_NUM 4 + +struct perf_event; +struct pmu; + +extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk); +extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, + unsigned long val, void *data); + + +int arch_install_hw_breakpoint(struct perf_event *bp); +void arch_uninstall_hw_breakpoint(struct perf_event *bp); +void hw_breakpoint_pmu_read(struct perf_event *bp); +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp); + +extern void +arch_fill_perf_breakpoint(struct perf_event *bp); + +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); + +extern int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type); + +extern struct pmu perf_ops_bp; + +#endif /* __KERNEL__ */ +#endif /* _I386_HW_BREAKPOINT_H */ + diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index ba180d93b08..6e124269fd4 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -79,14 +79,32 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, int ioapic, int ioapic_pin, int trigger, int polarity) { - irq_attr->ioapic = ioapic; - irq_attr->ioapic_pin = ioapic_pin; - irq_attr->trigger = trigger; - irq_attr->polarity = polarity; + irq_attr->ioapic = ioapic; + irq_attr->ioapic_pin = ioapic_pin; + irq_attr->trigger = trigger; + irq_attr->polarity = polarity; } -extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, - struct io_apic_irq_attr *irq_attr); +/* + * This is performance-critical, we want to do it O(1) + * + * Most irqs are mapped 1:1 with pins. + */ +struct irq_cfg { + struct irq_pin_list *irq_2_pin; + cpumask_var_t domain; + cpumask_var_t old_domain; + u8 vector; + u8 move_in_progress : 1; +}; + +extern struct irq_cfg *irq_cfg(unsigned int); +extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); +extern void send_cleanup_vector(struct irq_cfg *); + +struct irq_desc; +extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *); +extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); extern void setup_ioapic_dest(void); extern void enable_IO_APIC(void); diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h new file mode 100644 index 00000000000..205b063e3e3 --- /dev/null +++ b/arch/x86/include/asm/inat.h @@ -0,0 +1,220 @@ +#ifndef _ASM_X86_INAT_H +#define _ASM_X86_INAT_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include <asm/inat_types.h> + +/* + * Internal bits. Don't use bitmasks directly, because these bits are + * unstable. You should use checking functions. + */ + +#define INAT_OPCODE_TABLE_SIZE 256 +#define INAT_GROUP_TABLE_SIZE 8 + +/* Legacy last prefixes */ +#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ +#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ +/* Other Legacy prefixes */ +#define INAT_PFX_LOCK 4 /* 0xF0 */ +#define INAT_PFX_CS 5 /* 0x2E */ +#define INAT_PFX_DS 6 /* 0x3E */ +#define INAT_PFX_ES 7 /* 0x26 */ +#define INAT_PFX_FS 8 /* 0x64 */ +#define INAT_PFX_GS 9 /* 0x65 */ +#define INAT_PFX_SS 10 /* 0x36 */ +#define INAT_PFX_ADDRSZ 11 /* 0x67 */ +/* x86-64 REX prefix */ +#define INAT_PFX_REX 12 /* 0x4X */ +/* AVX VEX prefixes */ +#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ +#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ + +#define INAT_LSTPFX_MAX 3 +#define INAT_LGCPFX_MAX 11 + +/* Immediate size */ +#define INAT_IMM_BYTE 1 +#define INAT_IMM_WORD 2 +#define INAT_IMM_DWORD 3 +#define INAT_IMM_QWORD 4 +#define INAT_IMM_PTR 5 +#define INAT_IMM_VWORD32 6 +#define INAT_IMM_VWORD 7 + +/* Legacy prefix */ +#define INAT_PFX_OFFS 0 +#define INAT_PFX_BITS 4 +#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +/* Escape opcodes */ +#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +#define INAT_ESC_BITS 2 +#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +/* Group opcodes (1-16) */ +#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +#define INAT_GRP_BITS 5 +#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +/* Immediates */ +#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +#define INAT_IMM_BITS 3 +#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +/* Flags */ +#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +/* Attribute making macros for attribute tables */ +#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) + +/* Attribute search APIs */ +extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, + insn_byte_t last_pfx, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, + insn_byte_t last_pfx, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, + insn_byte_t vex_m, + insn_byte_t vex_pp); + +/* Attribute checking functions */ +static inline int inat_is_legacy_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr && attr <= INAT_LGCPFX_MAX; +} + +static inline int inat_is_address_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +} + +static inline int inat_is_operand_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +} + +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX; +} + +static inline int inat_last_prefix_id(insn_attr_t attr) +{ + if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) + return 0; + else + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_vex_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; +} + +static inline int inat_is_vex3_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; +} + +static inline int inat_is_escape(insn_attr_t attr) +{ + return attr & INAT_ESC_MASK; +} + +static inline int inat_escape_id(insn_attr_t attr) +{ + return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +} + +static inline int inat_is_group(insn_attr_t attr) +{ + return attr & INAT_GRP_MASK; +} + +static inline int inat_group_id(insn_attr_t attr) +{ + return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +} + +static inline int inat_group_common_attribute(insn_attr_t attr) +{ + return attr & ~INAT_GRP_MASK; +} + +static inline int inat_has_immediate(insn_attr_t attr) +{ + return attr & INAT_IMM_MASK; +} + +static inline int inat_immediate_size(insn_attr_t attr) +{ + return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +} + +static inline int inat_has_modrm(insn_attr_t attr) +{ + return attr & INAT_MODRM; +} + +static inline int inat_is_force64(insn_attr_t attr) +{ + return attr & INAT_FORCE64; +} + +static inline int inat_has_second_immediate(insn_attr_t attr) +{ + return attr & INAT_SCNDIMM; +} + +static inline int inat_has_moffset(insn_attr_t attr) +{ + return attr & INAT_MOFFSET; +} + +static inline int inat_has_variant(insn_attr_t attr) +{ + return attr & INAT_VARIANT; +} + +static inline int inat_accept_vex(insn_attr_t attr) +{ + return attr & INAT_VEXOK; +} + +static inline int inat_must_vex(insn_attr_t attr) +{ + return attr & INAT_VEXONLY; +} +#endif diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h new file mode 100644 index 00000000000..cb3c20ce39c --- /dev/null +++ b/arch/x86/include/asm/inat_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h new file mode 100644 index 00000000000..96c2e0ad04c --- /dev/null +++ b/arch/x86/include/asm/insn.h @@ -0,0 +1,184 @@ +#ifndef _ASM_X86_INSN_H +#define _ASM_X86_INSN_H +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +/* insn_attr_t is defined in inat.h */ +#include <asm/inat.h> + +struct insn_field { + union { + insn_value_t value; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +struct insn { + struct insn_field prefixes; /* + * Prefixes + * prefixes.bytes[3]: last prefix + */ + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field vex_prefix; /* VEX prefix */ + struct insn_field opcode; /* + * opcode.bytes[0]: opcode1 + * opcode.bytes[1]: opcode2 + * opcode.bytes[2]: opcode3 + */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + union { + struct insn_field immediate; + struct insn_field moffset1; /* for 64bit MOV */ + struct insn_field immediate1; /* for 64bit imm or off16/32 */ + }; + union { + struct insn_field moffset2; /* for 64bit MOV */ + struct insn_field immediate2; /* for 64bit imm or seg16 */ + }; + + insn_attr_t attr; + unsigned char opnd_bytes; + unsigned char addr_bytes; + unsigned char length; + unsigned char x86_64; + + const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *next_byte; +}; + +#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +#define X86_MODRM_RM(modrm) ((modrm) & 0x07) + +#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +#define X86_SIB_BASE(sib) ((sib) & 0x07) + +#define X86_REX_W(rex) ((rex) & 8) +#define X86_REX_R(rex) ((rex) & 4) +#define X86_REX_X(rex) ((rex) & 2) +#define X86_REX_B(rex) ((rex) & 1) + +/* VEX bit flags */ +#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ +#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ +#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ +#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ +#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ +/* VEX bit fields */ +#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ +#define X86_VEX2_M 1 /* VEX2.M always 1 */ +#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + +/* The last prefix is needed for two-byte and three-byte opcodes */ +static inline insn_byte_t insn_last_prefix(struct insn *insn) +{ + return insn->prefixes.bytes[3]; +} + +extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); +extern void insn_get_prefixes(struct insn *insn); +extern void insn_get_opcode(struct insn *insn); +extern void insn_get_modrm(struct insn *insn); +extern void insn_get_sib(struct insn *insn); +extern void insn_get_displacement(struct insn *insn); +extern void insn_get_immediate(struct insn *insn); +extern void insn_get_length(struct insn *insn); + +/* Attribute will be determined after getting ModRM (for opcode groups) */ +static inline void insn_get_attribute(struct insn *insn) +{ + insn_get_modrm(insn); +} + +/* Instruction uses RIP-relative addressing */ +extern int insn_rip_relative(struct insn *insn); + +/* Init insn for kernel text */ +static inline void kernel_insn_init(struct insn *insn, const void *kaddr) +{ +#ifdef CONFIG_X86_64 + insn_init(insn, kaddr, 1); +#else /* CONFIG_X86_32 */ + insn_init(insn, kaddr, 0); +#endif +} + +static inline int insn_is_avx(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.value != 0); +} + +static inline insn_byte_t insn_vex_m_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX2_M; + else + return X86_VEX3_M(insn->vex_prefix.bytes[1]); +} + +static inline insn_byte_t insn_vex_p_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX_P(insn->vex_prefix.bytes[1]); + else + return X86_VEX_P(insn->vex_prefix.bytes[2]); +} + +/* Offset of each field from kaddr */ +static inline int insn_offset_rex_prefix(struct insn *insn) +{ + return insn->prefixes.nbytes; +} +static inline int insn_offset_vex_prefix(struct insn *insn) +{ + return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +} +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; +} +static inline int insn_offset_modrm(struct insn *insn) +{ + return insn_offset_opcode(insn) + insn->opcode.nbytes; +} +static inline int insn_offset_sib(struct insn *insn) +{ + return insn_offset_modrm(insn) + insn->modrm.nbytes; +} +static inline int insn_offset_displacement(struct insn *insn) +{ + return insn_offset_sib(insn) + insn->sib.nbytes; +} +static inline int insn_offset_immediate(struct insn *insn) +{ + return insn_offset_displacement(insn) + insn->displacement.nbytes; +} + +#endif /* _ASM_X86_INSN_H */ diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index fd6d21bbee6..345c99cef15 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H -extern void pci_iommu_shutdown(void); -extern void no_iommu_init(void); extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index ddda6cbed6f..ffd700ff5dc 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -34,6 +34,7 @@ static inline int irq_canonicalize(int irq) #ifdef CONFIG_HOTPLUG_CPU #include <linux/cpumask.h> extern void fixup_irqs(void); +extern void irq_force_complete_move(int); #endif extern void (*generic_interrupt_extension)(void); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f1363b72364..858baa061cf 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -108,6 +108,8 @@ struct mce_log { #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) +extern struct atomic_notifier_head x86_mce_decoder_chain; + #ifdef __KERNEL__ #include <linux/percpu.h> @@ -118,9 +120,11 @@ extern int mce_disabled; extern int mce_p5_enabled; #ifdef CONFIG_X86_MCE -void mcheck_init(struct cpuinfo_x86 *c); +int mcheck_init(void); +void mcheck_cpu_init(struct cpuinfo_x86 *c); #else -static inline void mcheck_init(struct cpuinfo_x86 *c) {} +static inline int mcheck_init(void) { return 0; } +static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} #endif #ifdef CONFIG_X86_ANCIENT_MCE @@ -214,5 +218,11 @@ void intel_init_thermal(struct cpuinfo_x86 *c); void mce_log_therm_throt_event(__u64 status); +#ifdef CONFIG_X86_THERMAL_VECTOR +extern void mcheck_intel_therm_init(void); +#else +static inline void mcheck_intel_therm_init(void) { } +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 79c94500c0b..61d90b1331c 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -163,14 +163,16 @@ typedef struct physid_mask physid_mask_t; #define physids_shift_left(d, s, n) \ bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) -#define physids_coerce(map) ((map).mask[0]) +static inline unsigned long physids_coerce(physid_mask_t *map) +{ + return map->mask[0]; +} -#define physids_promote(physids) \ - ({ \ - physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ - __physid_mask.mask[0] = physids; \ - __physid_mask; \ - }) +static inline void physids_promote(unsigned long physids, physid_mask_t *map) +{ + physids_clear(*map); + map->mask[0] = physids; +} /* Note: will create very large stack frames if physid_mask_t is big */ #define physid_mask_of_physid(physid) \ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 7e2b6ba962f..5bef931f8b1 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -247,8 +247,8 @@ do { \ #ifdef CONFIG_SMP int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); -void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); -void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); @@ -264,12 +264,12 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) wrmsr(msr_no, l, h); return 0; } -static inline void rdmsr_on_cpus(const cpumask_t *m, u32 msr_no, +static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no, struct msr *msrs) { rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); } -static inline void wrmsr_on_cpus(const cpumask_t *m, u32 msr_no, +static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no, struct msr *msrs) { wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index ad7ce3fd506..8d9f8548a87 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -28,9 +28,20 @@ */ #define ARCH_PERFMON_EVENT_MASK 0xffff +/* + * filter mask to validate fixed counter events. + * the following filters disqualify for fixed counters: + * - inv + * - edge + * - cnt-mask + * The other filters are supported by fixed counters. + * The any-thread option is supported starting with v3. + */ +#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 + #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c9786480f0f..6f8ec1c37e0 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -30,6 +30,7 @@ struct mm_struct; #include <linux/math64.h> #include <linux/init.h> +#define HBP_NUM 4 /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -422,6 +423,8 @@ extern unsigned int xstate_size; extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; +struct perf_event; + struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; @@ -443,13 +446,10 @@ struct thread_struct { unsigned long fs; #endif unsigned long gs; - /* Hardware debugging registers: */ - unsigned long debugreg0; - unsigned long debugreg1; - unsigned long debugreg2; - unsigned long debugreg3; - unsigned long debugreg6; - unsigned long debugreg7; + /* Save middle states of ptrace breakpoints */ + struct perf_event *ptrace_bps[HBP_NUM]; + /* Debug status used for traps, single steps, etc... */ + unsigned long debugreg6; /* Fault info: */ unsigned long cr2; unsigned long trap_no; diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 0f0d908349a..3d11fd0f44c 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -7,6 +7,7 @@ #ifdef __KERNEL__ #include <asm/segment.h> +#include <asm/page_types.h> #endif #ifndef __ASSEMBLY__ @@ -216,6 +217,67 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) return regs->sp; } +/* Query offset/name of register from its name/offset */ +extern int regs_query_register_offset(const char *name); +extern const char *regs_query_register_name(unsigned int offset); +#define MAX_REG_OFFSET (offsetof(struct pt_regs, ss)) + +/** + * regs_get_register() - get register value from its offset + * @regs: pt_regs from which register value is gotten. + * @offset: offset number of the register. + * + * regs_get_register returns the value of a register. The @offset is the + * offset of the register in struct pt_regs address which specified by @regs. + * If @offset is bigger than MAX_REG_OFFSET, this returns 0. + */ +static inline unsigned long regs_get_register(struct pt_regs *regs, + unsigned int offset) +{ + if (unlikely(offset > MAX_REG_OFFSET)) + return 0; + return *(unsigned long *)((unsigned long)regs + offset); +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static inline int regs_within_kernel_stack(struct pt_regs *regs, + unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, + unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + +/* Get Nth argument at function call */ +extern unsigned long regs_get_argument_nth(struct pt_regs *regs, + unsigned int n); + /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index ae907e61718..3d3e8353ee5 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -177,10 +177,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) */ #ifndef CONFIG_KMEMCHECK + +#if (__GNUC__ >= 4) +#define memcpy(t, f, n) __builtin_memcpy(t, f, n) +#else #define memcpy(t, f, n) \ (__builtin_constant_p((n)) \ ? __constant_memcpy((t), (f), (n)) \ : __memcpy((t), (f), (n))) +#endif #else /* * kmemcheck becomes very happy if we use the REP instructions unconditionally, @@ -316,11 +321,15 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, : __memset_generic((s), (c), (count))) #define __HAVE_ARCH_MEMSET +#if (__GNUC__ >= 4) +#define memset(s, c, count) __builtin_memset(s, c, count) +#else #define memset(s, c, count) \ (__builtin_constant_p(c) \ ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ (count)) \ : __memset((s), (c), (count))) +#endif /* * find the first occurrence of byte 'c', or 1 past the area if none diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index b9e4e20174f..87ffcb12a1b 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -3,17 +3,14 @@ #include <linux/swiotlb.h> -/* SWIOTLB interface */ - -extern int swiotlb_force; - #ifdef CONFIG_SWIOTLB extern int swiotlb; -extern void pci_swiotlb_init(void); +extern int pci_swiotlb_init(void); #else #define swiotlb 0 -static inline void pci_swiotlb_init(void) +static inline int pci_swiotlb_init(void) { + return 0; } #endif diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index f08f9737489..022a84386de 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -128,8 +128,6 @@ do { \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ "call __switch_to\n\t" \ - ".globl thread_return\n" \ - "thread_return:\n\t" \ "movq "__percpu_arg([current_task])",%%rsi\n\t" \ __switch_canary \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ @@ -157,19 +155,22 @@ extern void native_load_gs_index(unsigned); * Load a segment. Fall back on loading the zero * segment if something goes wrong.. */ -#define loadsegment(seg, value) \ - asm volatile("\n" \ - "1:\t" \ - "movl %k0,%%" #seg "\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3:\t" \ - "movl %k1, %%" #seg "\n\t" \ - "jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b,3b) \ - : :"r" (value), "r" (0) : "memory") - +#define loadsegment(seg, value) \ +do { \ + unsigned short __val = (value); \ + \ + asm volatile(" \n" \ + "1: movl %k0,%%" #seg " \n" \ + \ + ".section .fixup,\"ax\" \n" \ + "2: xorl %k0,%k0 \n" \ + " jmp 1b \n" \ + ".previous \n" \ + \ + _ASM_EXTABLE(1b, 2b) \ + \ + : "+r" (__val) : : "memory"); \ +} while (0) /* * Save a segment register away diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index d2c6c930b49..abd3e0ea762 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -570,7 +570,6 @@ extern struct movsl_mask { #ifdef CONFIG_X86_32 # include "uaccess_32.h" #else -# define ARCH_HAS_SEARCH_EXTABLE # include "uaccess_64.h" #endif diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 632fb44b4cb..0c9825e97f3 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -187,9 +187,34 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n); -unsigned long __must_check copy_from_user(void *to, +unsigned long __must_check _copy_from_user(void *to, const void __user *from, unsigned long n); + + +extern void copy_from_user_overflow(void) +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS + __compiletime_error("copy_from_user() buffer size is not provably correct") +#else + __compiletime_warning("copy_from_user() buffer size is not provably correct") +#endif +; + +static inline unsigned long __must_check copy_from_user(void *to, + const void __user *from, + unsigned long n) +{ + int sz = __compiletime_object_size(to); + int ret = -EFAULT; + + if (likely(sz == -1 || sz >= n)) + ret = _copy_from_user(to, from, n); + else + copy_from_user_overflow(); + + return ret; +} + long __must_check strncpy_from_user(char *dst, const char __user *src, long count); long __must_check __strncpy_from_user(char *dst, diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index db24b215fc5..46324c6a4f6 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -19,12 +19,37 @@ __must_check unsigned long copy_user_generic(void *to, const void *from, unsigned len); __must_check unsigned long -copy_to_user(void __user *to, const void *from, unsigned len); +_copy_to_user(void __user *to, const void *from, unsigned len); __must_check unsigned long -copy_from_user(void *to, const void __user *from, unsigned len); +_copy_from_user(void *to, const void __user *from, unsigned len); __must_check unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len); +static inline unsigned long __must_check copy_from_user(void *to, + const void __user *from, + unsigned long n) +{ + int sz = __compiletime_object_size(to); + int ret = -EFAULT; + + might_fault(); + if (likely(sz == -1 || sz >= n)) + ret = _copy_from_user(to, from, n); +#ifdef CONFIG_DEBUG_VM + else + WARN(1, "Buffer overflow detected!\n"); +#endif + return ret; +} + +static __always_inline __must_check +int copy_to_user(void __user *dst, const void *src, unsigned size) +{ + might_fault(); + + return _copy_to_user(dst, src, size); +} + static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) { @@ -176,8 +201,11 @@ __must_check long strlen_user(const char __user *str); __must_check unsigned long clear_user(void __user *mem, unsigned long len); __must_check unsigned long __clear_user(void __user *mem, unsigned long len); -__must_check long __copy_from_user_inatomic(void *dst, const void __user *src, - unsigned size); +static __must_check __always_inline int +__copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) +{ + return copy_user_generic(dst, (__force const void *)src, size); +} static __must_check __always_inline int __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h index 9613c8c0b64..d6b17c76062 100644 --- a/arch/x86/include/asm/uv/uv_irq.h +++ b/arch/x86/include/asm/uv/uv_irq.h @@ -25,12 +25,14 @@ struct uv_IO_APIC_route_entry { dest : 32; }; -extern struct irq_chip uv_irq_chip; - -extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long); -extern void arch_disable_uv_irq(int, unsigned long); +enum { + UV_AFFINITY_ALL, + UV_AFFINITY_NODE, + UV_AFFINITY_CPU +}; -extern int uv_setup_irq(char *, int, int, unsigned long); -extern void uv_teardown_irq(unsigned int, int, unsigned long); +extern int uv_irq_2_mmr_info(int, unsigned long *, int *); +extern int uv_setup_irq(char *, int, int, unsigned long, int); +extern void uv_teardown_irq(unsigned int); #endif /* _ASM_X86_UV_UV_IRQ_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2c756fd4ab0..d8e71459f02 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -91,6 +91,14 @@ struct x86_init_timers { }; /** + * struct x86_init_iommu - platform specific iommu setup + * @iommu_init: platform specific iommu setup + */ +struct x86_init_iommu { + int (*iommu_init)(void); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ @@ -101,6 +109,7 @@ struct x86_init_ops { struct x86_init_oem oem; struct x86_init_paging paging; struct x86_init_timers timers; + struct x86_init_iommu iommu; }; /** @@ -121,6 +130,7 @@ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); unsigned long (*get_wallclock)(void); int (*set_wallclock)(unsigned long nowtime); + void (*iommu_shutdown)(void); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d8e5d0cdd67..4f2e66e29ec 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o pci-nommu.o +obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0285521e0a9..32fb09102a1 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel <joerg.roedel@amd.com> * Leo Duran <leo.duran@amd.com> * @@ -28,6 +28,7 @@ #include <asm/proto.h> #include <asm/iommu.h> #include <asm/gart.h> +#include <asm/amd_iommu_proto.h> #include <asm/amd_iommu_types.h> #include <asm/amd_iommu.h> @@ -56,20 +57,115 @@ struct iommu_cmd { u32 data[4]; }; -static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, - struct unity_map_entry *e); -static struct dma_ops_domain *find_protection_domain(u16 devid); -static u64 *alloc_pte(struct protection_domain *domain, - unsigned long address, int end_lvl, - u64 **pte_page, gfp_t gfp); -static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, - unsigned long start_page, - unsigned int pages); static void reset_iommu_command_buffer(struct amd_iommu *iommu); -static u64 *fetch_pte(struct protection_domain *domain, - unsigned long address, int map_size); static void update_domain(struct protection_domain *domain); +/**************************************************************************** + * + * Helper functions + * + ****************************************************************************/ + +static inline u16 get_device_id(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return calc_devid(pdev->bus->number, pdev->devfn); +} + +static struct iommu_dev_data *get_dev_data(struct device *dev) +{ + return dev->archdata.iommu; +} + +/* + * In this function the list of preallocated protection domains is traversed to + * find the domain for a specific device + */ +static struct dma_ops_domain *find_protection_domain(u16 devid) +{ + struct dma_ops_domain *entry, *ret = NULL; + unsigned long flags; + u16 alias = amd_iommu_alias_table[devid]; + + if (list_empty(&iommu_pd_list)) + return NULL; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + + list_for_each_entry(entry, &iommu_pd_list, list) { + if (entry->target_dev == devid || + entry->target_dev == alias) { + ret = entry; + break; + } + } + + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + + return ret; +} + +/* + * This function checks if the driver got a valid device from the caller to + * avoid dereferencing invalid pointers. + */ +static bool check_device(struct device *dev) +{ + u16 devid; + + if (!dev || !dev->dma_mask) + return false; + + /* No device or no PCI device */ + if (!dev || dev->bus != &pci_bus_type) + return false; + + devid = get_device_id(dev); + + /* Out of our scope? */ + if (devid > amd_iommu_last_bdf) + return false; + + if (amd_iommu_rlookup_table[devid] == NULL) + return false; + + return true; +} + +static int iommu_init_device(struct device *dev) +{ + struct iommu_dev_data *dev_data; + struct pci_dev *pdev; + u16 devid, alias; + + if (dev->archdata.iommu) + return 0; + + dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return -ENOMEM; + + dev_data->dev = dev; + + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff); + if (pdev) + dev_data->alias = &pdev->dev; + + atomic_set(&dev_data->bind, 0); + + dev->archdata.iommu = dev_data; + + + return 0; +} + +static void iommu_uninit_device(struct device *dev) +{ + kfree(dev->archdata.iommu); +} #ifdef CONFIG_AMD_IOMMU_STATS /* @@ -90,7 +186,6 @@ DECLARE_STATS_COUNTER(alloced_io_mem); DECLARE_STATS_COUNTER(total_map_requests); static struct dentry *stats_dir; -static struct dentry *de_isolate; static struct dentry *de_fflush; static void amd_iommu_stats_add(struct __iommu_counter *cnt) @@ -108,9 +203,6 @@ static void amd_iommu_stats_init(void) if (stats_dir == NULL) return; - de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, - (u32 *)&amd_iommu_isolate); - de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, (u32 *)&amd_iommu_unmap_flush); @@ -130,12 +222,6 @@ static void amd_iommu_stats_init(void) #endif -/* returns !0 if the IOMMU is caching non-present entries in its TLB */ -static int iommu_has_npcache(struct amd_iommu *iommu) -{ - return iommu->cap & (1UL << IOMMU_CAP_NPCACHE); -} - /**************************************************************************** * * Interrupt handling functions @@ -199,6 +285,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) break; case EVENT_TYPE_ILL_CMD: printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); + iommu->reset_in_progress = true; reset_iommu_command_buffer(iommu); dump_command(address); break; @@ -321,11 +408,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu) status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); - if (unlikely(i == EXIT_LOOP_COUNT)) { - spin_unlock(&iommu->lock); - reset_iommu_command_buffer(iommu); - spin_lock(&iommu->lock); - } + if (unlikely(i == EXIT_LOOP_COUNT)) + iommu->reset_in_progress = true; } /* @@ -372,26 +456,46 @@ static int iommu_completion_wait(struct amd_iommu *iommu) out: spin_unlock_irqrestore(&iommu->lock, flags); + if (iommu->reset_in_progress) + reset_iommu_command_buffer(iommu); + return 0; } +static void iommu_flush_complete(struct protection_domain *domain) +{ + int i; + + for (i = 0; i < amd_iommus_present; ++i) { + if (!domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need to wait for completion of all commands. + */ + iommu_completion_wait(amd_iommus[i]); + } +} + /* * Command send function for invalidating a device table entry */ -static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) +static int iommu_flush_device(struct device *dev) { + struct amd_iommu *iommu; struct iommu_cmd cmd; - int ret; + u16 devid; - BUG_ON(iommu == NULL); + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + /* Build command */ memset(&cmd, 0, sizeof(cmd)); CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); cmd.data[0] = devid; - ret = iommu_queue_command(iommu, &cmd); - - return ret; + return iommu_queue_command(iommu, &cmd); } static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, @@ -430,11 +534,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, * It invalidates a single PTE if the range to flush is within a single * page. Otherwise it flushes the whole TLB of the IOMMU. */ -static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, - u64 address, size_t size) +static void __iommu_flush_pages(struct protection_domain *domain, + u64 address, size_t size, int pde) { - int s = 0; - unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); + int s = 0, i; + unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE); address &= PAGE_MASK; @@ -447,142 +551,212 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, s = 1; } - iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s); - return 0; + for (i = 0; i < amd_iommus_present; ++i) { + if (!domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need a TLB flush + */ + iommu_queue_inv_iommu_pages(amd_iommus[i], address, + domain->id, pde, s); + } + + return; } -/* Flush the whole IO/TLB for a given protection domain */ -static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) +static void iommu_flush_pages(struct protection_domain *domain, + u64 address, size_t size) { - u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - - INC_STATS_COUNTER(domain_flush_single); + __iommu_flush_pages(domain, address, size, 0); +} - iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); +/* Flush the whole IO/TLB for a given protection domain */ +static void iommu_flush_tlb(struct protection_domain *domain) +{ + __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); } /* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) +static void iommu_flush_tlb_pde(struct protection_domain *domain) { - u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - - INC_STATS_COUNTER(domain_flush_single); - - iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1); + __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } + /* - * This function flushes one domain on one IOMMU + * This function flushes the DTEs for all devices in domain */ -static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) +static void iommu_flush_domain_devices(struct protection_domain *domain) { - struct iommu_cmd cmd; + struct iommu_dev_data *dev_data; unsigned long flags; - __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, - domid, 1, 1); + spin_lock_irqsave(&domain->lock, flags); - spin_lock_irqsave(&iommu->lock, flags); - __iommu_queue_command(iommu, &cmd); - __iommu_completion_wait(iommu); - __iommu_wait_for_completion(iommu); - spin_unlock_irqrestore(&iommu->lock, flags); + list_for_each_entry(dev_data, &domain->dev_list, list) + iommu_flush_device(dev_data->dev); + + spin_unlock_irqrestore(&domain->lock, flags); } -static void flush_all_domains_on_iommu(struct amd_iommu *iommu) +static void iommu_flush_all_domain_devices(void) { - int i; + struct protection_domain *domain; + unsigned long flags; - for (i = 1; i < MAX_DOMAIN_ID; ++i) { - if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) - continue; - flush_domain_on_iommu(iommu, i); + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + + list_for_each_entry(domain, &amd_iommu_pd_list, list) { + iommu_flush_domain_devices(domain); + iommu_flush_complete(domain); } + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + +void amd_iommu_flush_all_devices(void) +{ + iommu_flush_all_domain_devices(); } /* - * This function is used to flush the IO/TLB for a given protection domain - * on every IOMMU in the system + * This function uses heavy locking and may disable irqs for some time. But + * this is no issue because it is only called during resume. */ -static void iommu_flush_domain(u16 domid) +void amd_iommu_flush_all_domains(void) { - struct amd_iommu *iommu; + struct protection_domain *domain; + unsigned long flags; - INC_STATS_COUNTER(domain_flush_all); + spin_lock_irqsave(&amd_iommu_pd_lock, flags); - for_each_iommu(iommu) - flush_domain_on_iommu(iommu, domid); + list_for_each_entry(domain, &amd_iommu_pd_list, list) { + spin_lock(&domain->lock); + iommu_flush_tlb_pde(domain); + iommu_flush_complete(domain); + spin_unlock(&domain->lock); + } + + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); } -void amd_iommu_flush_all_domains(void) +static void reset_iommu_command_buffer(struct amd_iommu *iommu) { - struct amd_iommu *iommu; + pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); - for_each_iommu(iommu) - flush_all_domains_on_iommu(iommu); + if (iommu->reset_in_progress) + panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); + + amd_iommu_reset_cmd_buffer(iommu); + amd_iommu_flush_all_devices(); + amd_iommu_flush_all_domains(); + + iommu->reset_in_progress = false; } -static void flush_all_devices_for_iommu(struct amd_iommu *iommu) +/**************************************************************************** + * + * The functions below are used the create the page table mappings for + * unity mapped regions. + * + ****************************************************************************/ + +/* + * This function is used to add another level to an IO page table. Adding + * another level increases the size of the address space by 9 bits to a size up + * to 64 bits. + */ +static bool increase_address_space(struct protection_domain *domain, + gfp_t gfp) { - int i; + u64 *pte; - for (i = 0; i <= amd_iommu_last_bdf; ++i) { - if (iommu != amd_iommu_rlookup_table[i]) - continue; + if (domain->mode == PAGE_MODE_6_LEVEL) + /* address space already 64 bit large */ + return false; - iommu_queue_inv_dev_entry(iommu, i); - iommu_completion_wait(iommu); - } + pte = (void *)get_zeroed_page(gfp); + if (!pte) + return false; + + *pte = PM_LEVEL_PDE(domain->mode, + virt_to_phys(domain->pt_root)); + domain->pt_root = pte; + domain->mode += 1; + domain->updated = true; + + return true; } -static void flush_devices_by_domain(struct protection_domain *domain) +static u64 *alloc_pte(struct protection_domain *domain, + unsigned long address, + int end_lvl, + u64 **pte_page, + gfp_t gfp) { - struct amd_iommu *iommu; - int i; + u64 *pte, *page; + int level; - for (i = 0; i <= amd_iommu_last_bdf; ++i) { - if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || - (amd_iommu_pd_table[i] != domain)) - continue; + while (address > PM_LEVEL_SIZE(domain->mode)) + increase_address_space(domain, gfp); - iommu = amd_iommu_rlookup_table[i]; - if (!iommu) - continue; + level = domain->mode - 1; + pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - iommu_queue_inv_dev_entry(iommu, i); - iommu_completion_wait(iommu); + while (level > end_lvl) { + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(gfp); + if (!page) + return NULL; + *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); + } + + level -= 1; + + pte = IOMMU_PTE_PAGE(*pte); + + if (pte_page && level == end_lvl) + *pte_page = pte; + + pte = &pte[PM_LEVEL_INDEX(level, address)]; } + + return pte; } -static void reset_iommu_command_buffer(struct amd_iommu *iommu) +/* + * This function checks if there is a PTE for a given dma address. If + * there is one, it returns the pointer to it. + */ +static u64 *fetch_pte(struct protection_domain *domain, + unsigned long address, int map_size) { - pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); + int level; + u64 *pte; - if (iommu->reset_in_progress) - panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); + level = domain->mode - 1; + pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - iommu->reset_in_progress = true; + while (level > map_size) { + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; - amd_iommu_reset_cmd_buffer(iommu); - flush_all_devices_for_iommu(iommu); - flush_all_domains_on_iommu(iommu); + level -= 1; - iommu->reset_in_progress = false; -} + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[PM_LEVEL_INDEX(level, address)]; -void amd_iommu_flush_all_devices(void) -{ - flush_devices_by_domain(NULL); -} + if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { + pte = NULL; + break; + } + } -/**************************************************************************** - * - * The functions below are used the create the page table mappings for - * unity mapped regions. - * - ****************************************************************************/ + return pte; +} /* * Generic mapping functions. It maps a physical address into a DMA @@ -654,28 +828,6 @@ static int iommu_for_unity_map(struct amd_iommu *iommu, } /* - * Init the unity mappings for a specific IOMMU in the system - * - * Basically iterates over all unity mapping entries and applies them to - * the default domain DMA of that IOMMU if necessary. - */ -static int iommu_init_unity_mappings(struct amd_iommu *iommu) -{ - struct unity_map_entry *entry; - int ret; - - list_for_each_entry(entry, &amd_iommu_unity_map, list) { - if (!iommu_for_unity_map(iommu, entry)) - continue; - ret = dma_ops_unity_map(iommu->default_dom, entry); - if (ret) - return ret; - } - - return 0; -} - -/* * This function actually applies the mapping to the page table of the * dma_ops domain. */ @@ -704,6 +856,28 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, } /* + * Init the unity mappings for a specific IOMMU in the system + * + * Basically iterates over all unity mapping entries and applies them to + * the default domain DMA of that IOMMU if necessary. + */ +static int iommu_init_unity_mappings(struct amd_iommu *iommu) +{ + struct unity_map_entry *entry; + int ret; + + list_for_each_entry(entry, &amd_iommu_unity_map, list) { + if (!iommu_for_unity_map(iommu, entry)) + continue; + ret = dma_ops_unity_map(iommu->default_dom, entry); + if (ret) + return ret; + } + + return 0; +} + +/* * Inits the unity mappings required for a specific device */ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, @@ -740,34 +914,23 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, */ /* - * This function checks if there is a PTE for a given dma address. If - * there is one, it returns the pointer to it. + * Used to reserve address ranges in the aperture (e.g. for exclusion + * ranges. */ -static u64 *fetch_pte(struct protection_domain *domain, - unsigned long address, int map_size) +static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, + unsigned long start_page, + unsigned int pages) { - int level; - u64 *pte; - - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - - while (level > map_size) { - if (!IOMMU_PTE_PRESENT(*pte)) - return NULL; - - level -= 1; + unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[PM_LEVEL_INDEX(level, address)]; + if (start_page + pages > last_page) + pages = last_page - start_page; - if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { - pte = NULL; - break; - } + for (i = start_page; i < start_page + pages; ++i) { + int index = i / APERTURE_RANGE_PAGES; + int page = i % APERTURE_RANGE_PAGES; + __set_bit(page, dom->aperture[index]->bitmap); } - - return pte; } /* @@ -775,11 +938,11 @@ static u64 *fetch_pte(struct protection_domain *domain, * aperture in case of dma_ops domain allocation or address allocation * failure. */ -static int alloc_new_range(struct amd_iommu *iommu, - struct dma_ops_domain *dma_dom, +static int alloc_new_range(struct dma_ops_domain *dma_dom, bool populate, gfp_t gfp) { int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; + struct amd_iommu *iommu; int i; #ifdef CONFIG_IOMMU_STRESS @@ -819,14 +982,17 @@ static int alloc_new_range(struct amd_iommu *iommu, dma_dom->aperture_size += APERTURE_RANGE_SIZE; /* Intialize the exclusion range if necessary */ - if (iommu->exclusion_start && - iommu->exclusion_start >= dma_dom->aperture[index]->offset && - iommu->exclusion_start < dma_dom->aperture_size) { - unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; - int pages = iommu_num_pages(iommu->exclusion_start, - iommu->exclusion_length, - PAGE_SIZE); - dma_ops_reserve_addresses(dma_dom, startpage, pages); + for_each_iommu(iommu) { + if (iommu->exclusion_start && + iommu->exclusion_start >= dma_dom->aperture[index]->offset + && iommu->exclusion_start < dma_dom->aperture_size) { + unsigned long startpage; + int pages = iommu_num_pages(iommu->exclusion_start, + iommu->exclusion_length, + PAGE_SIZE); + startpage = iommu->exclusion_start >> PAGE_SHIFT; + dma_ops_reserve_addresses(dma_dom, startpage, pages); + } } /* @@ -928,7 +1094,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, } if (unlikely(address == -1)) - address = bad_dma_address; + address = DMA_ERROR_CODE; WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); @@ -973,6 +1139,31 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, * ****************************************************************************/ +/* + * This function adds a protection domain to the global protection domain list + */ +static void add_domain_to_list(struct protection_domain *domain) +{ + unsigned long flags; + + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + list_add(&domain->list, &amd_iommu_pd_list); + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + +/* + * This function removes a protection domain to the global + * protection domain list + */ +static void del_domain_from_list(struct protection_domain *domain) +{ + unsigned long flags; + + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + list_del(&domain->list); + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + static u16 domain_id_alloc(void) { unsigned long flags; @@ -1000,26 +1191,6 @@ static void domain_id_free(int id) write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } -/* - * Used to reserve address ranges in the aperture (e.g. for exclusion - * ranges. - */ -static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, - unsigned long start_page, - unsigned int pages) -{ - unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; - - if (start_page + pages > last_page) - pages = last_page - start_page; - - for (i = start_page; i < start_page + pages; ++i) { - int index = i / APERTURE_RANGE_PAGES; - int page = i % APERTURE_RANGE_PAGES; - __set_bit(page, dom->aperture[index]->bitmap); - } -} - static void free_pagetable(struct protection_domain *domain) { int i, j; @@ -1061,6 +1232,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) if (!dom) return; + del_domain_from_list(&dom->domain); + free_pagetable(&dom->domain); for (i = 0; i < APERTURE_MAX_RANGES; ++i) { @@ -1078,7 +1251,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) * It also intializes the page table and the address allocator data * structures required for the dma_ops interface */ -static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) +static struct dma_ops_domain *dma_ops_domain_alloc(void) { struct dma_ops_domain *dma_dom; @@ -1091,6 +1264,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) dma_dom->domain.id = domain_id_alloc(); if (dma_dom->domain.id == 0) goto free_dma_dom; + INIT_LIST_HEAD(&dma_dom->domain.dev_list); dma_dom->domain.mode = PAGE_MODE_2_LEVEL; dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); dma_dom->domain.flags = PD_DMA_OPS_MASK; @@ -1101,7 +1275,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) dma_dom->need_flush = false; dma_dom->target_dev = 0xffff; - if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) + add_domain_to_list(&dma_dom->domain); + + if (alloc_new_range(dma_dom, true, GFP_KERNEL)) goto free_dma_dom; /* @@ -1129,22 +1305,6 @@ static bool dma_ops_domain(struct protection_domain *domain) return domain->flags & PD_DMA_OPS_MASK; } -/* - * Find out the protection domain structure for a given PCI device. This - * will give us the pointer to the page table root for example. - */ -static struct protection_domain *domain_for_device(u16 devid) -{ - struct protection_domain *dom; - unsigned long flags; - - read_lock_irqsave(&amd_iommu_devtable_lock, flags); - dom = amd_iommu_pd_table[devid]; - read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); - - return dom; -} - static void set_dte_entry(u16 devid, struct protection_domain *domain) { u64 pte_root = virt_to_phys(domain->pt_root); @@ -1156,42 +1316,123 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain) amd_iommu_dev_table[devid].data[2] = domain->id; amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); +} + +static void clear_dte_entry(u16 devid) +{ + /* remove entry from the device table seen by the hardware */ + amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; + amd_iommu_dev_table[devid].data[1] = 0; + amd_iommu_dev_table[devid].data[2] = 0; - amd_iommu_pd_table[devid] = domain; + amd_iommu_apply_erratum_63(devid); +} + +static void do_attach(struct device *dev, struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + u16 devid; + + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + dev_data = get_dev_data(dev); + + /* Update data structures */ + dev_data->domain = domain; + list_add(&dev_data->list, &domain->dev_list); + set_dte_entry(devid, domain); + + /* Do reference counting */ + domain->dev_iommu[iommu->index] += 1; + domain->dev_cnt += 1; + + /* Flush the DTE entry */ + iommu_flush_device(dev); +} + +static void do_detach(struct device *dev) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + u16 devid; + + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + dev_data = get_dev_data(dev); + + /* decrease reference counters */ + dev_data->domain->dev_iommu[iommu->index] -= 1; + dev_data->domain->dev_cnt -= 1; + + /* Update data structures */ + dev_data->domain = NULL; + list_del(&dev_data->list); + clear_dte_entry(devid); + + /* Flush the DTE entry */ + iommu_flush_device(dev); } /* * If a device is not yet associated with a domain, this function does * assigns it visible for the hardware */ -static void __attach_device(struct amd_iommu *iommu, - struct protection_domain *domain, - u16 devid) +static int __attach_device(struct device *dev, + struct protection_domain *domain) { + struct iommu_dev_data *dev_data, *alias_data; + + dev_data = get_dev_data(dev); + alias_data = get_dev_data(dev_data->alias); + + if (!alias_data) + return -EINVAL; + /* lock domain */ spin_lock(&domain->lock); - /* update DTE entry */ - set_dte_entry(devid, domain); + /* Some sanity checks */ + if (alias_data->domain != NULL && + alias_data->domain != domain) + return -EBUSY; - domain->dev_cnt += 1; + if (dev_data->domain != NULL && + dev_data->domain != domain) + return -EBUSY; + + /* Do real assignment */ + if (dev_data->alias != dev) { + alias_data = get_dev_data(dev_data->alias); + if (alias_data->domain == NULL) + do_attach(dev_data->alias, domain); + + atomic_inc(&alias_data->bind); + } + + if (dev_data->domain == NULL) + do_attach(dev, domain); + + atomic_inc(&dev_data->bind); /* ready */ spin_unlock(&domain->lock); + + return 0; } /* * If a device is not yet associated with a domain, this function does * assigns it visible for the hardware */ -static void attach_device(struct amd_iommu *iommu, - struct protection_domain *domain, - u16 devid) +static int attach_device(struct device *dev, + struct protection_domain *domain) { unsigned long flags; + int ret; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - __attach_device(iommu, domain, devid); + ret = __attach_device(dev, domain); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); /* @@ -1199,98 +1440,125 @@ static void attach_device(struct amd_iommu *iommu, * left the caches in the IOMMU dirty. So we have to flush * here to evict all dirty stuff. */ - iommu_queue_inv_dev_entry(iommu, devid); - iommu_flush_tlb_pde(iommu, domain->id); + iommu_flush_tlb_pde(domain); + + return ret; } /* * Removes a device from a protection domain (unlocked) */ -static void __detach_device(struct protection_domain *domain, u16 devid) +static void __detach_device(struct device *dev) { + struct iommu_dev_data *dev_data = get_dev_data(dev); + struct iommu_dev_data *alias_data; + unsigned long flags; - /* lock domain */ - spin_lock(&domain->lock); - - /* remove domain from the lookup table */ - amd_iommu_pd_table[devid] = NULL; + BUG_ON(!dev_data->domain); - /* remove entry from the device table seen by the hardware */ - amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; - amd_iommu_dev_table[devid].data[1] = 0; - amd_iommu_dev_table[devid].data[2] = 0; + spin_lock_irqsave(&dev_data->domain->lock, flags); - amd_iommu_apply_erratum_63(devid); + if (dev_data->alias != dev) { + alias_data = get_dev_data(dev_data->alias); + if (atomic_dec_and_test(&alias_data->bind)) + do_detach(dev_data->alias); + } - /* decrease reference counter */ - domain->dev_cnt -= 1; + if (atomic_dec_and_test(&dev_data->bind)) + do_detach(dev); - /* ready */ - spin_unlock(&domain->lock); + spin_unlock_irqrestore(&dev_data->domain->lock, flags); /* * If we run in passthrough mode the device must be assigned to the * passthrough domain if it is detached from any other domain */ - if (iommu_pass_through) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - __attach_device(iommu, pt_domain, devid); - } + if (iommu_pass_through && dev_data->domain == NULL) + __attach_device(dev, pt_domain); } /* * Removes a device from a protection domain (with devtable_lock held) */ -static void detach_device(struct protection_domain *domain, u16 devid) +static void detach_device(struct device *dev) { unsigned long flags; /* lock device table */ write_lock_irqsave(&amd_iommu_devtable_lock, flags); - __detach_device(domain, devid); + __detach_device(dev); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } +/* + * Find out the protection domain structure for a given PCI device. This + * will give us the pointer to the page table root for example. + */ +static struct protection_domain *domain_for_device(struct device *dev) +{ + struct protection_domain *dom; + struct iommu_dev_data *dev_data, *alias_data; + unsigned long flags; + u16 devid, alias; + + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + dev_data = get_dev_data(dev); + alias_data = get_dev_data(dev_data->alias); + if (!alias_data) + return NULL; + + read_lock_irqsave(&amd_iommu_devtable_lock, flags); + dom = dev_data->domain; + if (dom == NULL && + alias_data->domain != NULL) { + __attach_device(dev, alias_data->domain); + dom = alias_data->domain; + } + + read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return dom; +} + static int device_change_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct pci_dev *pdev = to_pci_dev(dev); - u16 devid = calc_devid(pdev->bus->number, pdev->devfn); + u16 devid; struct protection_domain *domain; struct dma_ops_domain *dma_domain; struct amd_iommu *iommu; unsigned long flags; - if (devid > amd_iommu_last_bdf) - goto out; - - devid = amd_iommu_alias_table[devid]; - - iommu = amd_iommu_rlookup_table[devid]; - if (iommu == NULL) - goto out; - - domain = domain_for_device(devid); + if (!check_device(dev)) + return 0; - if (domain && !dma_ops_domain(domain)) - WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " - "to a non-dma-ops domain\n", dev_name(dev)); + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; switch (action) { case BUS_NOTIFY_UNBOUND_DRIVER: + + domain = domain_for_device(dev); + if (!domain) goto out; if (iommu_pass_through) break; - detach_device(domain, devid); + detach_device(dev); break; case BUS_NOTIFY_ADD_DEVICE: + + iommu_init_device(dev); + + domain = domain_for_device(dev); + /* allocate a protection domain if a device is added */ dma_domain = find_protection_domain(devid); if (dma_domain) goto out; - dma_domain = dma_ops_domain_alloc(iommu); + dma_domain = dma_ops_domain_alloc(); if (!dma_domain) goto out; dma_domain->target_dev = devid; @@ -1300,11 +1568,15 @@ static int device_change_notifier(struct notifier_block *nb, spin_unlock_irqrestore(&iommu_pd_list_lock, flags); break; + case BUS_NOTIFY_DEL_DEVICE: + + iommu_uninit_device(dev); + default: goto out; } - iommu_queue_inv_dev_entry(iommu, devid); + iommu_flush_device(dev); iommu_completion_wait(iommu); out: @@ -1322,106 +1594,46 @@ static struct notifier_block device_nb = { *****************************************************************************/ /* - * This function checks if the driver got a valid device from the caller to - * avoid dereferencing invalid pointers. - */ -static bool check_device(struct device *dev) -{ - if (!dev || !dev->dma_mask) - return false; - - return true; -} - -/* - * In this function the list of preallocated protection domains is traversed to - * find the domain for a specific device - */ -static struct dma_ops_domain *find_protection_domain(u16 devid) -{ - struct dma_ops_domain *entry, *ret = NULL; - unsigned long flags; - - if (list_empty(&iommu_pd_list)) - return NULL; - - spin_lock_irqsave(&iommu_pd_list_lock, flags); - - list_for_each_entry(entry, &iommu_pd_list, list) { - if (entry->target_dev == devid) { - ret = entry; - break; - } - } - - spin_unlock_irqrestore(&iommu_pd_list_lock, flags); - - return ret; -} - -/* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the * requestor id for a given device. * If the device is not yet associated with a domain this is also done * in this function. */ -static int get_device_resources(struct device *dev, - struct amd_iommu **iommu, - struct protection_domain **domain, - u16 *bdf) +static struct protection_domain *get_domain(struct device *dev) { + struct protection_domain *domain; struct dma_ops_domain *dma_dom; - struct pci_dev *pcidev; - u16 _bdf; - - *iommu = NULL; - *domain = NULL; - *bdf = 0xffff; - - if (dev->bus != &pci_bus_type) - return 0; + u16 devid = get_device_id(dev); - pcidev = to_pci_dev(dev); - _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); + if (!check_device(dev)) + return ERR_PTR(-EINVAL); - /* device not translated by any IOMMU in the system? */ - if (_bdf > amd_iommu_last_bdf) - return 0; + domain = domain_for_device(dev); + if (domain != NULL && !dma_ops_domain(domain)) + return ERR_PTR(-EBUSY); - *bdf = amd_iommu_alias_table[_bdf]; + if (domain != NULL) + return domain; - *iommu = amd_iommu_rlookup_table[*bdf]; - if (*iommu == NULL) - return 0; - *domain = domain_for_device(*bdf); - if (*domain == NULL) { - dma_dom = find_protection_domain(*bdf); - if (!dma_dom) - dma_dom = (*iommu)->default_dom; - *domain = &dma_dom->domain; - attach_device(*iommu, *domain, *bdf); - DUMP_printk("Using protection domain %d for device %s\n", - (*domain)->id, dev_name(dev)); - } - - if (domain_for_device(_bdf) == NULL) - attach_device(*iommu, *domain, _bdf); + /* Device not bount yet - bind it */ + dma_dom = find_protection_domain(devid); + if (!dma_dom) + dma_dom = amd_iommu_rlookup_table[devid]->default_dom; + attach_device(dev, &dma_dom->domain); + DUMP_printk("Using protection domain %d for device %s\n", + dma_dom->domain.id, dev_name(dev)); - return 1; + return &dma_dom->domain; } static void update_device_table(struct protection_domain *domain) { - unsigned long flags; - int i; + struct iommu_dev_data *dev_data; - for (i = 0; i <= amd_iommu_last_bdf; ++i) { - if (amd_iommu_pd_table[i] != domain) - continue; - write_lock_irqsave(&amd_iommu_devtable_lock, flags); - set_dte_entry(i, domain); - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + list_for_each_entry(dev_data, &domain->dev_list, list) { + u16 devid = get_device_id(dev_data->dev); + set_dte_entry(devid, domain); } } @@ -1431,76 +1643,13 @@ static void update_domain(struct protection_domain *domain) return; update_device_table(domain); - flush_devices_by_domain(domain); - iommu_flush_domain(domain->id); + iommu_flush_domain_devices(domain); + iommu_flush_tlb_pde(domain); domain->updated = false; } /* - * This function is used to add another level to an IO page table. Adding - * another level increases the size of the address space by 9 bits to a size up - * to 64 bits. - */ -static bool increase_address_space(struct protection_domain *domain, - gfp_t gfp) -{ - u64 *pte; - - if (domain->mode == PAGE_MODE_6_LEVEL) - /* address space already 64 bit large */ - return false; - - pte = (void *)get_zeroed_page(gfp); - if (!pte) - return false; - - *pte = PM_LEVEL_PDE(domain->mode, - virt_to_phys(domain->pt_root)); - domain->pt_root = pte; - domain->mode += 1; - domain->updated = true; - - return true; -} - -static u64 *alloc_pte(struct protection_domain *domain, - unsigned long address, - int end_lvl, - u64 **pte_page, - gfp_t gfp) -{ - u64 *pte, *page; - int level; - - while (address > PM_LEVEL_SIZE(domain->mode)) - increase_address_space(domain, gfp); - - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - - while (level > end_lvl) { - if (!IOMMU_PTE_PRESENT(*pte)) { - page = (u64 *)get_zeroed_page(gfp); - if (!page) - return NULL; - *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); - } - - level -= 1; - - pte = IOMMU_PTE_PAGE(*pte); - - if (pte_page && level == end_lvl) - *pte_page = pte; - - pte = &pte[PM_LEVEL_INDEX(level, address)]; - } - - return pte; -} - -/* * This function fetches the PTE for a given address in the aperture */ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, @@ -1530,8 +1679,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, * This is the generic map function. It maps one 4kb page at paddr to * the given address in the DMA address space for the domain. */ -static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, - struct dma_ops_domain *dom, +static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, unsigned long address, phys_addr_t paddr, int direction) @@ -1544,7 +1692,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, pte = dma_ops_get_pte(dom, address); if (!pte) - return bad_dma_address; + return DMA_ERROR_CODE; __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; @@ -1565,8 +1713,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, /* * The generic unmapping function for on page in the DMA address space. */ -static void dma_ops_domain_unmap(struct amd_iommu *iommu, - struct dma_ops_domain *dom, +static void dma_ops_domain_unmap(struct dma_ops_domain *dom, unsigned long address) { struct aperture_range *aperture; @@ -1597,7 +1744,6 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, * Must be called with the domain lock held. */ static dma_addr_t __map_single(struct device *dev, - struct amd_iommu *iommu, struct dma_ops_domain *dma_dom, phys_addr_t paddr, size_t size, @@ -1625,7 +1771,7 @@ static dma_addr_t __map_single(struct device *dev, retry: address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, dma_mask); - if (unlikely(address == bad_dma_address)) { + if (unlikely(address == DMA_ERROR_CODE)) { /* * setting next_address here will let the address * allocator only scan the new allocated range in the @@ -1633,7 +1779,7 @@ retry: */ dma_dom->next_address = dma_dom->aperture_size; - if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC)) + if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) goto out; /* @@ -1645,8 +1791,8 @@ retry: start = address; for (i = 0; i < pages; ++i) { - ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); - if (ret == bad_dma_address) + ret = dma_ops_domain_map(dma_dom, start, paddr, dir); + if (ret == DMA_ERROR_CODE) goto out_unmap; paddr += PAGE_SIZE; @@ -1657,10 +1803,10 @@ retry: ADD_STATS_COUNTER(alloced_io_mem, size); if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { - iommu_flush_tlb(iommu, dma_dom->domain.id); + iommu_flush_tlb(&dma_dom->domain); dma_dom->need_flush = false; - } else if (unlikely(iommu_has_npcache(iommu))) - iommu_flush_pages(iommu, dma_dom->domain.id, address, size); + } else if (unlikely(amd_iommu_np_cache)) + iommu_flush_pages(&dma_dom->domain, address, size); out: return address; @@ -1669,20 +1815,19 @@ out_unmap: for (--i; i >= 0; --i) { start -= PAGE_SIZE; - dma_ops_domain_unmap(iommu, dma_dom, start); + dma_ops_domain_unmap(dma_dom, start); } dma_ops_free_addresses(dma_dom, address, pages); - return bad_dma_address; + return DMA_ERROR_CODE; } /* * Does the reverse of the __map_single function. Must be called with * the domain lock held too */ -static void __unmap_single(struct amd_iommu *iommu, - struct dma_ops_domain *dma_dom, +static void __unmap_single(struct dma_ops_domain *dma_dom, dma_addr_t dma_addr, size_t size, int dir) @@ -1690,7 +1835,7 @@ static void __unmap_single(struct amd_iommu *iommu, dma_addr_t i, start; unsigned int pages; - if ((dma_addr == bad_dma_address) || + if ((dma_addr == DMA_ERROR_CODE) || (dma_addr + size > dma_dom->aperture_size)) return; @@ -1699,7 +1844,7 @@ static void __unmap_single(struct amd_iommu *iommu, start = dma_addr; for (i = 0; i < pages; ++i) { - dma_ops_domain_unmap(iommu, dma_dom, start); + dma_ops_domain_unmap(dma_dom, start); start += PAGE_SIZE; } @@ -1708,7 +1853,7 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); if (amd_iommu_unmap_flush || dma_dom->need_flush) { - iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); + iommu_flush_pages(&dma_dom->domain, dma_addr, size); dma_dom->need_flush = false; } } @@ -1722,36 +1867,29 @@ static dma_addr_t map_page(struct device *dev, struct page *page, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; - u16 devid; dma_addr_t addr; u64 dma_mask; phys_addr_t paddr = page_to_phys(page) + offset; INC_STATS_COUNTER(cnt_map_single); - if (!check_device(dev)) - return bad_dma_address; - - dma_mask = *dev->dma_mask; - - get_device_resources(dev, &iommu, &domain, &devid); - - if (iommu == NULL || domain == NULL) - /* device not handled by any AMD IOMMU */ + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) return (dma_addr_t)paddr; + else if (IS_ERR(domain)) + return DMA_ERROR_CODE; - if (!dma_ops_domain(domain)) - return bad_dma_address; + dma_mask = *dev->dma_mask; spin_lock_irqsave(&domain->lock, flags); - addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, + + addr = __map_single(dev, domain->priv, paddr, size, dir, false, dma_mask); - if (addr == bad_dma_address) + if (addr == DMA_ERROR_CODE) goto out; - iommu_completion_wait(iommu); + iommu_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1766,25 +1904,19 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; - u16 devid; INC_STATS_COUNTER(cnt_unmap_single); - if (!check_device(dev) || - !get_device_resources(dev, &iommu, &domain, &devid)) - /* device not handled by any AMD IOMMU */ - return; - - if (!dma_ops_domain(domain)) + domain = get_domain(dev); + if (IS_ERR(domain)) return; spin_lock_irqsave(&domain->lock, flags); - __unmap_single(iommu, domain->priv, dma_addr, size, dir); + __unmap_single(domain->priv, dma_addr, size, dir); - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1816,9 +1948,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; - u16 devid; int i; struct scatterlist *s; phys_addr_t paddr; @@ -1827,25 +1957,20 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, INC_STATS_COUNTER(cnt_map_sg); - if (!check_device(dev)) + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) + return map_sg_no_iommu(dev, sglist, nelems, dir); + else if (IS_ERR(domain)) return 0; dma_mask = *dev->dma_mask; - get_device_resources(dev, &iommu, &domain, &devid); - - if (!iommu || !domain) - return map_sg_no_iommu(dev, sglist, nelems, dir); - - if (!dma_ops_domain(domain)) - return 0; - spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { paddr = sg_phys(s); - s->dma_address = __map_single(dev, iommu, domain->priv, + s->dma_address = __map_single(dev, domain->priv, paddr, s->length, dir, false, dma_mask); @@ -1856,7 +1981,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - iommu_completion_wait(iommu); + iommu_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1865,7 +1990,7 @@ out: unmap: for_each_sg(sglist, s, mapped_elems, i) { if (s->dma_address) - __unmap_single(iommu, domain->priv, s->dma_address, + __unmap_single(domain->priv, s->dma_address, s->dma_length, dir); s->dma_address = s->dma_length = 0; } @@ -1884,30 +2009,25 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; struct scatterlist *s; - u16 devid; int i; INC_STATS_COUNTER(cnt_unmap_sg); - if (!check_device(dev) || - !get_device_resources(dev, &iommu, &domain, &devid)) - return; - - if (!dma_ops_domain(domain)) + domain = get_domain(dev); + if (IS_ERR(domain)) return; spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { - __unmap_single(iommu, domain->priv, s->dma_address, + __unmap_single(domain->priv, s->dma_address, s->dma_length, dir); s->dma_address = s->dma_length = 0; } - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1920,49 +2040,44 @@ static void *alloc_coherent(struct device *dev, size_t size, { unsigned long flags; void *virt_addr; - struct amd_iommu *iommu; struct protection_domain *domain; - u16 devid; phys_addr_t paddr; u64 dma_mask = dev->coherent_dma_mask; INC_STATS_COUNTER(cnt_alloc_coherent); - if (!check_device(dev)) + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) { + virt_addr = (void *)__get_free_pages(flag, get_order(size)); + *dma_addr = __pa(virt_addr); + return virt_addr; + } else if (IS_ERR(domain)) return NULL; - if (!get_device_resources(dev, &iommu, &domain, &devid)) - flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + dma_mask = dev->coherent_dma_mask; + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + flag |= __GFP_ZERO; - flag |= __GFP_ZERO; virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) return NULL; paddr = virt_to_phys(virt_addr); - if (!iommu || !domain) { - *dma_addr = (dma_addr_t)paddr; - return virt_addr; - } - - if (!dma_ops_domain(domain)) - goto out_free; - if (!dma_mask) dma_mask = *dev->dma_mask; spin_lock_irqsave(&domain->lock, flags); - *dma_addr = __map_single(dev, iommu, domain->priv, paddr, + *dma_addr = __map_single(dev, domain->priv, paddr, size, DMA_BIDIRECTIONAL, true, dma_mask); - if (*dma_addr == bad_dma_address) { + if (*dma_addr == DMA_ERROR_CODE) { spin_unlock_irqrestore(&domain->lock, flags); goto out_free; } - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); @@ -1982,28 +2097,19 @@ static void free_coherent(struct device *dev, size_t size, void *virt_addr, dma_addr_t dma_addr) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; - u16 devid; INC_STATS_COUNTER(cnt_free_coherent); - if (!check_device(dev)) - return; - - get_device_resources(dev, &iommu, &domain, &devid); - - if (!iommu || !domain) - goto free_mem; - - if (!dma_ops_domain(domain)) + domain = get_domain(dev); + if (IS_ERR(domain)) goto free_mem; spin_lock_irqsave(&domain->lock, flags); - __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); + __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); @@ -2017,22 +2123,7 @@ free_mem: */ static int amd_iommu_dma_supported(struct device *dev, u64 mask) { - u16 bdf; - struct pci_dev *pcidev; - - /* No device or no PCI device */ - if (!dev || dev->bus != &pci_bus_type) - return 0; - - pcidev = to_pci_dev(dev); - - bdf = calc_devid(pcidev->bus->number, pcidev->devfn); - - /* Out of our scope? */ - if (bdf > amd_iommu_last_bdf) - return 0; - - return 1; + return check_device(dev); } /* @@ -2046,25 +2137,30 @@ static void prealloc_protection_domains(void) { struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; - struct amd_iommu *iommu; u16 devid; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - devid = calc_devid(dev->bus->number, dev->devfn); - if (devid > amd_iommu_last_bdf) - continue; - devid = amd_iommu_alias_table[devid]; - if (domain_for_device(devid)) + + /* Do we handle this device? */ + if (!check_device(&dev->dev)) continue; - iommu = amd_iommu_rlookup_table[devid]; - if (!iommu) + + iommu_init_device(&dev->dev); + + /* Is there already any domain for it? */ + if (domain_for_device(&dev->dev)) continue; - dma_dom = dma_ops_domain_alloc(iommu); + + devid = get_device_id(&dev->dev); + + dma_dom = dma_ops_domain_alloc(); if (!dma_dom) continue; init_unity_mappings_for_device(dma_dom, devid); dma_dom->target_dev = devid; + attach_device(&dev->dev, &dma_dom->domain); + list_add_tail(&dma_dom->list, &iommu_pd_list); } } @@ -2093,7 +2189,7 @@ int __init amd_iommu_init_dma_ops(void) * protection domain will be assigned to the default one. */ for_each_iommu(iommu) { - iommu->default_dom = dma_ops_domain_alloc(iommu); + iommu->default_dom = dma_ops_domain_alloc(); if (iommu->default_dom == NULL) return -ENOMEM; iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; @@ -2103,15 +2199,12 @@ int __init amd_iommu_init_dma_ops(void) } /* - * If device isolation is enabled, pre-allocate the protection - * domains for each device. + * Pre-allocate the protection domains for each device. */ - if (amd_iommu_isolate) - prealloc_protection_domains(); + prealloc_protection_domains(); iommu_detected = 1; - force_iommu = 1; - bad_dma_address = 0; + swiotlb = 0; #ifdef CONFIG_GART_IOMMU gart_iommu_aperture_disabled = 1; gart_iommu_aperture = 0; @@ -2150,14 +2243,17 @@ free_domains: static void cleanup_domain(struct protection_domain *domain) { + struct iommu_dev_data *dev_data, *next; unsigned long flags; - u16 devid; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) - if (amd_iommu_pd_table[devid] == domain) - __detach_device(domain, devid); + list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { + struct device *dev = dev_data->dev; + + do_detach(dev); + atomic_set(&dev_data->bind, 0); + } write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } @@ -2167,6 +2263,8 @@ static void protection_domain_free(struct protection_domain *domain) if (!domain) return; + del_domain_from_list(domain); + if (domain->id) domain_id_free(domain->id); @@ -2185,6 +2283,9 @@ static struct protection_domain *protection_domain_alloc(void) domain->id = domain_id_alloc(); if (!domain->id) goto out_err; + INIT_LIST_HEAD(&domain->dev_list); + + add_domain_to_list(domain); return domain; @@ -2241,26 +2342,23 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { - struct protection_domain *domain = dom->priv; + struct iommu_dev_data *dev_data = dev->archdata.iommu; struct amd_iommu *iommu; - struct pci_dev *pdev; u16 devid; - if (dev->bus != &pci_bus_type) + if (!check_device(dev)) return; - pdev = to_pci_dev(dev); - - devid = calc_devid(pdev->bus->number, pdev->devfn); + devid = get_device_id(dev); - if (devid > 0) - detach_device(domain, devid); + if (dev_data->domain != NULL) + detach_device(dev); iommu = amd_iommu_rlookup_table[devid]; if (!iommu) return; - iommu_queue_inv_dev_entry(iommu, devid); + iommu_flush_device(dev); iommu_completion_wait(iommu); } @@ -2268,35 +2366,30 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev) { struct protection_domain *domain = dom->priv; - struct protection_domain *old_domain; + struct iommu_dev_data *dev_data; struct amd_iommu *iommu; - struct pci_dev *pdev; + int ret; u16 devid; - if (dev->bus != &pci_bus_type) + if (!check_device(dev)) return -EINVAL; - pdev = to_pci_dev(dev); + dev_data = dev->archdata.iommu; - devid = calc_devid(pdev->bus->number, pdev->devfn); - - if (devid >= amd_iommu_last_bdf || - devid != amd_iommu_alias_table[devid]) - return -EINVAL; + devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; if (!iommu) return -EINVAL; - old_domain = domain_for_device(devid); - if (old_domain) - detach_device(old_domain, devid); + if (dev_data->domain) + detach_device(dev); - attach_device(iommu, domain, devid); + ret = attach_device(dev, domain); iommu_completion_wait(iommu); - return 0; + return ret; } static int amd_iommu_map_range(struct iommu_domain *dom, @@ -2342,7 +2435,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, iova += PAGE_SIZE; } - iommu_flush_domain(domain->id); + iommu_flush_tlb_pde(domain); } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, @@ -2393,8 +2486,9 @@ static struct iommu_ops amd_iommu_ops = { int __init amd_iommu_init_passthrough(void) { + struct amd_iommu *iommu; struct pci_dev *dev = NULL; - u16 devid, devid2; + u16 devid; /* allocate passthroug domain */ pt_domain = protection_domain_alloc(); @@ -2404,20 +2498,17 @@ int __init amd_iommu_init_passthrough(void) pt_domain->mode |= PAGE_MODE_NONE; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - struct amd_iommu *iommu; - devid = calc_devid(dev->bus->number, dev->devfn); - if (devid > amd_iommu_last_bdf) + if (!check_device(&dev->dev)) continue; - devid2 = amd_iommu_alias_table[devid]; + devid = get_device_id(&dev->dev); - iommu = amd_iommu_rlookup_table[devid2]; + iommu = amd_iommu_rlookup_table[devid]; if (!iommu) continue; - __attach_device(iommu, pt_domain, devid); - __attach_device(iommu, pt_domain, devid2); + attach_device(&dev->dev, pt_domain); } pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c20001e4f55..7ffc3996523 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel <joerg.roedel@amd.com> * Leo Duran <leo.duran@amd.com> * @@ -25,10 +25,12 @@ #include <linux/interrupt.h> #include <linux/msi.h> #include <asm/pci-direct.h> +#include <asm/amd_iommu_proto.h> #include <asm/amd_iommu_types.h> #include <asm/amd_iommu.h> #include <asm/iommu.h> #include <asm/gart.h> +#include <asm/x86_init.h> /* * definitions for the ACPI scanning code @@ -123,18 +125,24 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have to handle */ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ -#ifdef CONFIG_IOMMU_STRESS -bool amd_iommu_isolate = false; -#else -bool amd_iommu_isolate = true; /* if true, device isolation is - enabled */ -#endif - bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ +/* Array to assign indices to IOMMUs*/ +struct amd_iommu *amd_iommus[MAX_IOMMUS]; +int amd_iommus_present; + +/* IOMMUs have a non-present cache? */ +bool amd_iommu_np_cache __read_mostly; + +/* + * List of protection domains - used during resume + */ +LIST_HEAD(amd_iommu_pd_list); +spinlock_t amd_iommu_pd_lock; + /* * Pointer to the device table which is shared by all AMD IOMMUs * it is indexed by the PCI device id or the HT unit id and contains @@ -157,12 +165,6 @@ u16 *amd_iommu_alias_table; struct amd_iommu **amd_iommu_rlookup_table; /* - * The pd table (protection domain table) is used to find the protection domain - * data structure a device belongs to. Indexed with the PCI device id too. - */ -struct protection_domain **amd_iommu_pd_table; - -/* * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap * to know which ones are already in use. */ @@ -838,7 +840,18 @@ static void __init free_iommu_all(void) static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) { spin_lock_init(&iommu->lock); + + /* Add IOMMU to internal data structures */ list_add_tail(&iommu->list, &amd_iommu_list); + iommu->index = amd_iommus_present++; + + if (unlikely(iommu->index >= MAX_IOMMUS)) { + WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); + return -ENOSYS; + } + + /* Index is fine - add IOMMU to the array */ + amd_iommus[iommu->index] = iommu; /* * Copy data from ACPI table entry to the iommu struct @@ -868,6 +881,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); + if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) + amd_iommu_np_cache = true; + return pci_enable_device(iommu->dev); } @@ -925,7 +941,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) * ****************************************************************************/ -static int __init iommu_setup_msi(struct amd_iommu *iommu) +static int iommu_setup_msi(struct amd_iommu *iommu) { int r; @@ -1176,19 +1192,10 @@ static struct sys_device device_amd_iommu = { * functions. Finally it prints some information about AMD IOMMUs and * the driver state and enables the hardware. */ -int __init amd_iommu_init(void) +static int __init amd_iommu_init(void) { int i, ret = 0; - - if (no_iommu) { - printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); - return 0; - } - - if (!amd_iommu_detected) - return -ENODEV; - /* * First parse ACPI tables to find the largest Bus/Dev/Func * we need to handle. Upon this information the shared data @@ -1225,15 +1232,6 @@ int __init amd_iommu_init(void) if (amd_iommu_rlookup_table == NULL) goto free; - /* - * Protection Domain table - maps devices to protection domains - * This table has the same size as the rlookup_table - */ - amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(rlookup_table_size)); - if (amd_iommu_pd_table == NULL) - goto free; - amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, get_order(MAX_DOMAIN_ID/8)); @@ -1255,6 +1253,8 @@ int __init amd_iommu_init(void) */ amd_iommu_pd_alloc_bitmap[0] = 1; + spin_lock_init(&amd_iommu_pd_lock); + /* * now the data structures are allocated and basically initialized * start the real acpi table scan @@ -1286,17 +1286,12 @@ int __init amd_iommu_init(void) if (iommu_pass_through) goto out; - printk(KERN_INFO "AMD-Vi: device isolation "); - if (amd_iommu_isolate) - printk("enabled\n"); - else - printk("disabled\n"); - if (amd_iommu_unmap_flush) printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); else printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); + x86_platform.iommu_shutdown = disable_iommus; out: return ret; @@ -1304,9 +1299,6 @@ free: free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, get_order(MAX_DOMAIN_ID/8)); - free_pages((unsigned long)amd_iommu_pd_table, - get_order(rlookup_table_size)); - free_pages((unsigned long)amd_iommu_rlookup_table, get_order(rlookup_table_size)); @@ -1323,11 +1315,6 @@ free: goto out; } -void amd_iommu_shutdown(void) -{ - disable_iommus(); -} - /**************************************************************************** * * Early detect code. This code runs at IOMMU detection time in the DMA @@ -1342,16 +1329,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) void __init amd_iommu_detect(void) { - if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) + if (no_iommu || (iommu_detected && !gart_iommu_aperture)) return; if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { iommu_detected = 1; amd_iommu_detected = 1; -#ifdef CONFIG_GART_IOMMU - gart_iommu_aperture_disabled = 1; - gart_iommu_aperture = 0; -#endif + x86_init.iommu.iommu_init = amd_iommu_init; } } @@ -1372,10 +1356,6 @@ static int __init parse_amd_iommu_dump(char *str) static int __init parse_amd_iommu_options(char *str) { for (; *str; ++str) { - if (strncmp(str, "isolate", 7) == 0) - amd_iommu_isolate = true; - if (strncmp(str, "share", 5) == 0) - amd_iommu_isolate = false; if (strncmp(str, "fullflush", 9) == 0) amd_iommu_unmap_flush = true; } diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 128111d8ffe..e0dfb6856aa 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -28,6 +28,7 @@ #include <asm/pci-direct.h> #include <asm/dma.h> #include <asm/k8.h> +#include <asm/x86_init.h> int gart_iommu_aperture; int gart_iommu_aperture_disabled __initdata; @@ -400,6 +401,7 @@ void __init gart_iommu_hole_init(void) iommu_detected = 1; gart_iommu_aperture = 1; + x86_init.iommu.iommu_init = gart_iommu_init; aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; aper_size = (32 * 1024 * 1024) << aper_order; @@ -456,7 +458,7 @@ out: if (aper_alloc) { /* Got the aperture from the AGP bridge */ - } else if (swiotlb && !valid_agp) { + } else if (!valid_agp) { /* Do nothing */ } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || force_iommu || diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index da7b7b9f8bd..565c1bfc507 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,7 +2,7 @@ # Makefile for local APIC drivers and for the IO-APIC code # -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 894aa97f071..ad8c75b9e45 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -241,28 +241,13 @@ static int modern_apic(void) } /* - * bare function to substitute write operation - * and it's _that_ fast :) - */ -static void native_apic_write_dummy(u32 reg, u32 v) -{ - WARN_ON_ONCE((cpu_has_apic || !disable_apic)); -} - -static u32 native_apic_read_dummy(u32 reg) -{ - WARN_ON_ONCE((cpu_has_apic && !disable_apic)); - return 0; -} - -/* - * right after this call apic->write/read doesn't do anything - * note that there is no restore operation it works one way + * right after this call apic become NOOP driven + * so apic->write/read doesn't do anything */ void apic_disable(void) { - apic->read = native_apic_read_dummy; - apic->write = native_apic_write_dummy; + pr_info("APIC: switched to apic NOOP\n"); + apic = &apic_noop; } void native_apic_wait_icr_idle(void) @@ -459,7 +444,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, v = apic_read(APIC_LVTT); v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write(APIC_LVTT, v); - apic_write(APIC_TMICT, 0xffffffff); + apic_write(APIC_TMICT, 0); break; case CLOCK_EVT_MODE_RESUME: /* Nothing to do here */ @@ -1392,14 +1377,11 @@ void __init enable_IR_x2apic(void) unsigned long flags; struct IO_APIC_route_entry **ioapic_entries = NULL; int ret, x2apic_enabled = 0; - int dmar_table_init_ret = 0; + int dmar_table_init_ret; -#ifdef CONFIG_INTR_REMAP dmar_table_init_ret = dmar_table_init(); - if (dmar_table_init_ret) - pr_debug("dmar_table_init() failed with %d:\n", - dmar_table_init_ret); -#endif + if (dmar_table_init_ret && !x2apic_supported()) + return; ioapic_entries = alloc_ioapic_entries(); if (!ioapic_entries) { diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c new file mode 100644 index 00000000000..d9acc3bee0f --- /dev/null +++ b/arch/x86/kernel/apic/apic_noop.c @@ -0,0 +1,200 @@ +/* + * NOOP APIC driver. + * + * Does almost nothing and should be substituted by a real apic driver via + * probe routine. + * + * Though in case if apic is disabled (for some reason) we try + * to not uglify the caller's code and allow to call (some) apic routines + * like self-ipi, etc... + */ + +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <asm/fixmap.h> +#include <asm/mpspec.h> +#include <asm/apicdef.h> +#include <asm/apic.h> +#include <asm/setup.h> + +#include <linux/smp.h> +#include <asm/ipi.h> + +#include <linux/interrupt.h> +#include <asm/acpi.h> +#include <asm/e820.h> + +static void noop_init_apic_ldr(void) { } +static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { } +static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { } +static void noop_send_IPI_allbutself(int vector) { } +static void noop_send_IPI_all(int vector) { } +static void noop_send_IPI_self(int vector) { } +static void noop_apic_wait_icr_idle(void) { } +static void noop_apic_icr_write(u32 low, u32 id) { } + +static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip) +{ + return -1; +} + +static u32 noop_safe_apic_wait_icr_idle(void) +{ + return 0; +} + +static u64 noop_apic_icr_read(void) +{ + return 0; +} + +static int noop_cpu_to_logical_apicid(int cpu) +{ + return 0; +} + +static int noop_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return 0; +} + +static unsigned int noop_get_apic_id(unsigned long x) +{ + return 0; +} + +static int noop_probe(void) +{ + /* + * NOOP apic should not ever be + * enabled via probe routine + */ + return 0; +} + +static int noop_apic_id_registered(void) +{ + /* + * if we would be really "pedantic" + * we should pass read_apic_id() here + * but since NOOP suppose APIC ID = 0 + * lets save a few cycles + */ + return physid_isset(0, phys_cpu_present_map); +} + +static const struct cpumask *noop_target_cpus(void) +{ + /* only BSP here */ + return cpumask_of(0); +} + +static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid) +{ + return physid_isset(apicid, *map); +} + +static unsigned long noop_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + if (cpu != 0) + pr_warning("APIC: Vector allocated for non-BSP cpu\n"); + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); +} + +int noop_apicid_to_node(int logical_apicid) +{ + /* we're always on node 0 */ + return 0; +} + +static u32 noop_apic_read(u32 reg) +{ + WARN_ON_ONCE((cpu_has_apic && !disable_apic)); + return 0; +} + +static void noop_apic_write(u32 reg, u32 v) +{ + WARN_ON_ONCE((cpu_has_apic || !disable_apic)); +} + +struct apic apic_noop = { + .name = "noop", + .probe = noop_probe, + .acpi_madt_oem_check = NULL, + + .apic_id_registered = noop_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, + + .target_cpus = noop_target_cpus, + .disable_esr = 0, + .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = noop_check_apicid_used, + .check_apicid_present = noop_check_apicid_present, + + .vector_allocation_domain = noop_vector_allocation_domain, + .init_apic_ldr = noop_init_apic_ldr, + + .ioapic_phys_id_map = default_ioapic_phys_id_map, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = noop_apicid_to_node, + + .cpu_to_logical_apicid = noop_cpu_to_logical_apicid, + .cpu_present_to_apicid = default_cpu_present_to_apicid, + .apicid_to_cpu_present = physid_set_mask_of_physid, + + .setup_portio_remap = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, + .enable_apic_mode = NULL, + + .phys_pkg_id = noop_phys_pkg_id, + + .mps_oem_check = NULL, + + .get_apic_id = noop_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0x0F << 24, + + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + + .send_IPI_mask = noop_send_IPI_mask, + .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, + .send_IPI_allbutself = noop_send_IPI_allbutself, + .send_IPI_all = noop_send_IPI_all, + .send_IPI_self = noop_send_IPI_self, + + .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, + + /* should be safe */ + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, + + .wait_for_init_deassert = NULL, + + .smp_callin_clear_local_apic = NULL, + .inquire_remote_apic = NULL, + + .read = noop_apic_read, + .write = noop_apic_write, + .icr_read = noop_apic_icr_read, + .icr_write = noop_apic_icr_write, + .wait_icr_idle = noop_apic_wait_icr_idle, + .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, +}; diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 77a06413b6b..38dcecfa581 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -35,7 +35,7 @@ static const struct cpumask *bigsmp_target_cpus(void) #endif } -static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } @@ -93,11 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - /* Mapping from cpu number to logical apicid */ static inline int bigsmp_cpu_to_logical_apicid(int cpu) { @@ -106,10 +101,10 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu) return cpu_physical_id(cpu); } -static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) +static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xFFL); + physids_promote(0xFFL, retmap); } static int bigsmp_check_phys_apicid_present(int phys_apicid) @@ -230,7 +225,7 @@ struct apic apic_bigsmp = { .apicid_to_node = bigsmp_apicid_to_node, .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, - .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, + .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, .check_phys_apicid_present = bigsmp_check_phys_apicid_present, .enable_apic_mode = NULL, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 89174f847b4..e85f8fb7f8e 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -466,11 +466,11 @@ static const struct cpumask *es7000_target_cpus(void) return cpumask_of(smp_processor_id()); } -static unsigned long -es7000_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } + static unsigned long es7000_check_apicid_present(int bit) { return physid_isset(bit, phys_cpu_present_map); @@ -539,14 +539,10 @@ static int es7000_cpu_present_to_apicid(int mps_cpu) static int cpu_id; -static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) +static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap) { - physid_mask_t mask; - - mask = physid_mask_of_physid(cpu_id); + physid_set_mask_of_physid(cpu_id, retmap); ++cpu_id; - - return mask; } /* Mapping from cpu number to logical apicid */ @@ -561,10 +557,10 @@ static int es7000_cpu_to_logical_apicid(int cpu) #endif } -static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) +static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xff); + physids_promote(0xFFL, retmap); } static int es7000_check_phys_apicid_present(int cpu_physical_apicid) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index dc69f28489f..c0b4468683f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -60,8 +60,6 @@ #include <asm/irq_remapping.h> #include <asm/hpet.h> #include <asm/hw_irq.h> -#include <asm/uv/uv_hub.h> -#include <asm/uv/uv_irq.h> #include <asm/apic.h> @@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) return pin; } -/* - * This is performance-critical, we want to do it O(1) - * - * Most irqs are mapped 1:1 with pins. - */ -struct irq_cfg { - struct irq_pin_list *irq_2_pin; - cpumask_var_t domain; - cpumask_var_t old_domain; - unsigned move_cleanup_count; - u8 vector; - u8 move_in_progress : 1; -}; - /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ #ifdef CONFIG_SPARSE_IRQ static struct irq_cfg irq_cfgx[] = { @@ -209,7 +193,7 @@ int __init arch_early_irq_init(void) } #ifdef CONFIG_SPARSE_IRQ -static struct irq_cfg *irq_cfg(unsigned int irq) +struct irq_cfg *irq_cfg(unsigned int irq) { struct irq_cfg *cfg = NULL; struct irq_desc *desc; @@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) /* end for move_irq_desc */ #else -static struct irq_cfg *irq_cfg(unsigned int irq) +struct irq_cfg *irq_cfg(unsigned int irq) { return irq < nr_irqs ? irq_cfgx + irq : NULL; } @@ -555,23 +539,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, add_pin_to_irq_node(cfg, node, newapic, newpin); } +static void __io_apic_modify_irq(struct irq_pin_list *entry, + int mask_and, int mask_or, + void (*final)(struct irq_pin_list *entry)) +{ + unsigned int reg, pin; + + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin * 2); + reg &= mask_and; + reg |= mask_or; + io_apic_modify(entry->apic, 0x10 + pin * 2, reg); + if (final) + final(entry); +} + static void io_apic_modify_irq(struct irq_cfg *cfg, int mask_and, int mask_or, void (*final)(struct irq_pin_list *entry)) { - int pin; struct irq_pin_list *entry; - for_each_irq_pin(entry, cfg->irq_2_pin) { - unsigned int reg; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin * 2); - reg &= mask_and; - reg |= mask_or; - io_apic_modify(entry->apic, 0x10 + pin * 2, reg); - if (final) - final(entry); - } + for_each_irq_pin(entry, cfg->irq_2_pin) + __io_apic_modify_irq(entry, mask_and, mask_or, final); +} + +static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry) +{ + __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED, NULL); +} + +static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) +{ + __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER, NULL); } static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) @@ -595,18 +597,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } -static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED, NULL); -} - -static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER, NULL); -} - static void mask_IO_APIC_irq_desc(struct irq_desc *desc) { struct irq_cfg *cfg = desc->chip_data; @@ -1177,7 +1167,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) int cpu, err; cpumask_var_t tmp_mask; - if ((cfg->move_in_progress) || cfg->move_cleanup_count) + if (cfg->move_in_progress) return -EBUSY; if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) @@ -1237,8 +1227,7 @@ next: return err; } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) { int err; unsigned long flags; @@ -1599,9 +1588,6 @@ __apicdebuginit(void) print_IO_APIC(void) struct irq_desc *desc; unsigned int irq; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", @@ -1708,9 +1694,6 @@ __apicdebuginit(void) print_APIC_field(int base) { int i; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG); for (i = 0; i < 8; i++) @@ -1724,9 +1707,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy) unsigned int i, v, ver, maxlvt; u64 icr; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); @@ -1824,13 +1804,19 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk("\n"); } -__apicdebuginit(void) print_all_local_APICs(void) +__apicdebuginit(void) print_local_APICs(int maxcpu) { int cpu; + if (!maxcpu) + return; + preempt_disable(); - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { + if (cpu >= maxcpu) + break; smp_call_function_single(cpu, print_local_APIC, NULL, 1); + } preempt_enable(); } @@ -1839,7 +1825,7 @@ __apicdebuginit(void) print_PIC(void) unsigned int v; unsigned long flags; - if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs) + if (!nr_legacy_irqs) return; printk(KERN_DEBUG "\nprinting PIC contents\n"); @@ -1866,21 +1852,41 @@ __apicdebuginit(void) print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -__apicdebuginit(int) print_all_ICs(void) +static int __initdata show_lapic = 1; +static __init int setup_show_lapic(char *arg) { + int num = -1; + + if (strcmp(arg, "all") == 0) { + show_lapic = CONFIG_NR_CPUS; + } else { + get_option(&arg, &num); + if (num >= 0) + show_lapic = num; + } + + return 1; +} +__setup("show_lapic=", setup_show_lapic); + +__apicdebuginit(int) print_ICs(void) +{ + if (apic_verbosity == APIC_QUIET) + return 0; + print_PIC(); /* don't print out if apic is not there */ if (!cpu_has_apic && !apic_from_smp_config()) return 0; - print_all_local_APICs(); + print_local_APICs(show_lapic); print_IO_APIC(); return 0; } -fs_initcall(print_all_ICs); +fs_initcall(print_ICs); /* Where if anywhere is the i8259 connect in external int mode */ @@ -2031,7 +2037,7 @@ void __init setup_ioapic_ids_from_mpc(void) * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ - phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); + apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); /* * Set the IOAPIC ID to the value stored in the MPC table. @@ -2058,7 +2064,7 @@ void __init setup_ioapic_ids_from_mpc(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(phys_id_present_map, + if (apic->check_apicid_used(&phys_id_present_map, mp_ioapics[apic_id].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", apic_id, mp_ioapics[apic_id].apicid); @@ -2073,7 +2079,7 @@ void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic_id].apicid = i; } else { physid_mask_t tmp; - tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); + apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", mp_ioapics[apic_id].apicid); @@ -2228,20 +2234,16 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP -static void send_cleanup_vector(struct irq_cfg *cfg) +void send_cleanup_vector(struct irq_cfg *cfg) { cpumask_var_t cleanup_mask; if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { unsigned int i; - cfg->move_cleanup_count = 0; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - cfg->move_cleanup_count++; for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); } else { cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - cfg->move_cleanup_count = cpumask_weight(cleanup_mask); apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } @@ -2272,15 +2274,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); - /* * Either sets desc->affinity to a valid value, and returns * ->cpu_mask_to_apicid of that, or returns BAD_APICID and * leaves desc->affinity untouched. */ -static unsigned int +unsigned int set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; @@ -2433,8 +2432,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) cfg = irq_cfg(irq); spin_lock(&desc->lock); - if (!cfg->move_cleanup_count) - goto unlock; if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) goto unlock; @@ -2452,7 +2449,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) goto unlock; } __get_cpu_var(vector_irq)[vector] = -1; - cfg->move_cleanup_count--; unlock: spin_unlock(&desc->lock); } @@ -2460,21 +2456,33 @@ unlock: irq_exit(); } -static void irq_complete_move(struct irq_desc **descp) +static void __irq_complete_move(struct irq_desc **descp, unsigned vector) { struct irq_desc *desc = *descp; struct irq_cfg *cfg = desc->chip_data; - unsigned vector, me; + unsigned me; if (likely(!cfg->move_in_progress)) return; - vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) send_cleanup_vector(cfg); } + +static void irq_complete_move(struct irq_desc **descp) +{ + __irq_complete_move(descp, ~get_irq_regs()->orig_ax); +} + +void irq_force_complete_move(int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; + + __irq_complete_move(&desc, cfg->vector); +} #else static inline void irq_complete_move(struct irq_desc **descp) {} #endif @@ -2490,6 +2498,59 @@ static void ack_apic_edge(unsigned int irq) atomic_t irq_mis_count; +/* + * IO-APIC versions below 0x20 don't support EOI register. + * For the record, here is the information about various versions: + * 0Xh 82489DX + * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant + * 2Xh I/O(x)APIC which is PCI 2.2 Compliant + * 30h-FFh Reserved + * + * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic + * version as 0x2. This is an error with documentation and these ICH chips + * use io-apic's of version 0x20. + * + * For IO-APIC's with EOI register, we use that to do an explicit EOI. + * Otherwise, we simulate the EOI message manually by changing the trigger + * mode to edge and then back to level, with RTE being masked during this. +*/ +static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +{ + struct irq_pin_list *entry; + + for_each_irq_pin(entry, cfg->irq_2_pin) { + if (mp_ioapics[entry->apic].apicver >= 0x20) { + /* + * Intr-remapping uses pin number as the virtual vector + * in the RTE. Actual vector is programmed in + * intr-remapping table entry. Hence for the io-apic + * EOI we use the pin number. + */ + if (irq_remapped(irq)) + io_apic_eoi(entry->apic, entry->pin); + else + io_apic_eoi(entry->apic, cfg->vector); + } else { + __mask_and_edge_IO_APIC_irq(entry); + __unmask_and_level_IO_APIC_irq(entry); + } + } +} + +static void eoi_ioapic_irq(struct irq_desc *desc) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + __eoi_ioapic_irq(irq, cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + static void ack_apic_level(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2525,6 +2586,19 @@ static void ack_apic_level(unsigned int irq) * level-triggered interrupt. We mask the source for the time of the * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro + * + * Also in the case when cpu goes offline, fixup_irqs() will forward + * any unhandled interrupt on the offlined cpu to the new cpu + * destination that is handling the corresponding interrupt. This + * interrupt forwarding is done via IPI's. Hence, in this case also + * level-triggered io-apic interrupt will be seen as an edge + * interrupt in the IRR. And we can't rely on the cpu's EOI + * to be broadcasted to the IO-APIC's which will clear the remoteIRR + * corresponding to the level-triggered interrupt. Hence on IO-APIC's + * supporting EOI register, we do an explicit EOI to clear the + * remote IRR and on IO-APIC's which don't have an EOI register, + * we use the above logic (mask+edge followed by unmask+level) from + * Manfred Spraul to clear the remote IRR. */ cfg = desc->chip_data; i = cfg->vector; @@ -2536,6 +2610,19 @@ static void ack_apic_level(unsigned int irq) */ ack_APIC_irq(); + /* + * Tail end of clearing remote IRR bit (either by delivering the EOI + * message via io-apic EOI register write or simulating it using + * mask+edge followed by unnask+level logic) manually when the + * level triggered interrupt is seen as the edge triggered interrupt + * at the cpu. + */ + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + + eoi_ioapic_irq(desc); + } + /* Now we can move and renable the irq */ if (unlikely(do_unmask_irq)) { /* Only migrate the irq if the ack has been received. @@ -2569,41 +2656,9 @@ static void ack_apic_level(unsigned int irq) move_masked_irq(irq); unmask_IO_APIC_irq_desc(desc); } - - /* Tail end of version 0x11 I/O APIC bug workaround */ - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(cfg); - __unmask_and_level_IO_APIC_irq(cfg); - spin_unlock(&ioapic_lock); - } } #ifdef CONFIG_INTR_REMAP -static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) -{ - struct irq_pin_list *entry; - - for_each_irq_pin(entry, cfg->irq_2_pin) - io_apic_eoi(entry->apic, entry->pin); -} - -static void -eoi_ioapic_irq(struct irq_desc *desc) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; - - spin_lock_irqsave(&ioapic_lock, flags); - __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - static void ir_ack_apic_edge(unsigned int irq) { ack_APIC_irq(); @@ -3157,6 +3212,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) continue; desc_new = move_irq_desc(desc_new, node); + cfg_new = desc_new->chip_data; if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; @@ -3708,75 +3764,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ -#ifdef CONFIG_X86_UV -/* - * Re-target the irq to the specified CPU and enable the specified MMR located - * on the specified blade to allow the sending of MSIs to the specified CPU. - */ -int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, - unsigned long mmr_offset) -{ - const struct cpumask *eligible_cpu = cpumask_of(cpu); - struct irq_cfg *cfg; - int mmr_pnode; - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - unsigned long flags; - int err; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - cfg = irq_cfg(irq); - - err = assign_irq_vector(irq, cfg, eligible_cpu); - if (err != 0) - return err; - - spin_lock_irqsave(&vector_lock, flags); - set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, - irq_name); - spin_unlock_irqrestore(&vector_lock, flags); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); - - mmr_pnode = uv_blade_to_pnode(mmr_blade); - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return irq; -} - -/* - * Disable the specified MMR located on the specified blade so that MSIs are - * longer allowed to be sent. - */ -void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) -{ - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - int mmr_pnode; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->mask = 1; - - mmr_pnode = uv_blade_to_pnode(mmr_blade); - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); -} -#endif /* CONFIG_X86_64 */ - int __init io_apic_get_redir_entries (int ioapic) { union IO_APIC_reg_01 reg_01; @@ -3944,7 +3931,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) */ if (physids_empty(apic_id_map)) - apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); + apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); @@ -3960,10 +3947,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) * Every APIC in a system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(apic_id_map, apic_id)) { + if (apic->check_apicid_used(&apic_id_map, apic_id)) { for (i = 0; i < get_physical_broadcast(); i++) { - if (!apic->check_apicid_used(apic_id_map, i)) + if (!apic->check_apicid_used(&apic_id_map, i)) break; } @@ -3976,7 +3963,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) apic_id = i; } - tmp = apic->apicid_to_cpu_present(apic_id); + apic->apicid_to_cpu_present(apic_id, &tmp); physids_or(apic_id_map, apic_id_map, tmp); if (reg_00.bits.ID != apic_id) { @@ -4106,7 +4093,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) for (i = 0; i < nr_ioapics; i++) { res[i].name = mem; res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); + snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; } @@ -4140,18 +4127,17 @@ void __init ioapic_init_mappings(void) #ifdef CONFIG_X86_32 fake_ioapic_page: #endif - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); + apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), + ioapic_phys); idx++; ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; ioapic_res++; } } diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 7ff61d6a188..6389432a9db 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -39,7 +39,8 @@ int unknown_nmi_panic; int nmi_watchdog_enabled; -static cpumask_t backtrace_mask __read_mostly; +/* For reliability, we're prepared to waste bits here. */ +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; /* nmi_active: * >0: the lapic NMI watchdog is active, but can be disabled @@ -414,7 +415,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) } /* We can be called before check_nmi_watchdog, hence NULL check. */ - if (cpumask_test_cpu(cpu, &backtrace_mask)) { + if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { static DEFINE_SPINLOCK(lock); /* Serialise the printks */ spin_lock(&lock); @@ -422,7 +423,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) show_regs(regs); dump_stack(); spin_unlock(&lock); - cpumask_clear_cpu(cpu, &backtrace_mask); + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); rc = 1; } @@ -558,14 +559,14 @@ void arch_trigger_all_cpu_backtrace(void) { int i; - cpumask_copy(&backtrace_mask, cpu_online_mask); + cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); printk(KERN_INFO "sending NMI to all CPUs:\n"); apic->send_IPI_all(NMI_VECTOR); /* Wait for up to 10 seconds for all CPUs to do the backtrace */ for (i = 0; i < 10 * 1000; i++) { - if (cpumask_empty(&backtrace_mask)) + if (cpumask_empty(to_cpumask(backtrace_mask))) break; mdelay(1); } diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index efa00e2b850..07cdbdcd7a9 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -334,10 +334,9 @@ static inline const struct cpumask *numaq_target_cpus(void) return cpu_all_mask; } -static inline unsigned long -numaq_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid) { - return physid_isset(apicid, bitmap); + return physid_isset(apicid, *map); } static inline unsigned long numaq_check_apicid_present(int bit) @@ -371,10 +370,10 @@ static inline int numaq_multi_timer_check(int apic, int irq) return apic != 0 && irq == 0; } -static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) +static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* We don't have a good way to do this yet - hack */ - return physids_promote(0xFUL); + return physids_promote(0xFUL, retmap); } static inline int numaq_cpu_to_logical_apicid(int cpu) @@ -402,12 +401,12 @@ static inline int numaq_apicid_to_node(int logical_apicid) return logical_apicid >> 4; } -static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) +static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) { int node = numaq_apicid_to_node(logical_apicid); int cpu = __ffs(logical_apicid & 0xf); - return physid_mask_of_physid(cpu + 4*node); + physid_set_mask_of_physid(cpu + 4*node, retmap); } /* Where the IO area was mapped on multiquad, always 0 otherwise */ diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 0c0182cc947..1a6559f6768 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -108,7 +108,7 @@ struct apic apic_default = { .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = default_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = default_apicid_to_cpu_present, + .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 645ecc4ff0b..9b419263d90 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -183,7 +183,7 @@ static const struct cpumask *summit_target_cpus(void) return cpumask_of(0); } -static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } @@ -261,15 +261,15 @@ static int summit_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map) +static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0x0F); + physids_promote(0x0FL, retmap); } -static physid_mask_t summit_apicid_to_cpu_present(int apicid) +static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap) { - return physid_mask_of_physid(0); + physid_set_mask_of_physid(0, retmap); } static int summit_check_phys_apicid_present(int physical_apicid) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 326c25477d3..130c4b93487 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -409,6 +409,12 @@ static __init void map_mmioh_high(int max_pnode) map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); } +static __init void map_low_mmrs(void) +{ + init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); + init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); +} + static __init void uv_rtc_init(void) { long status; @@ -550,6 +556,8 @@ void __init uv_system_init(void) unsigned long mmr_base, present, paddr; unsigned short pnode_mask; + map_low_mmrs(); + m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 151ace69a5a..b5b6b23bce5 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -204,7 +204,6 @@ #include <linux/module.h> #include <linux/poll.h> -#include <linux/smp_lock.h> #include <linux/types.h> #include <linux/stddef.h> #include <linux/timer.h> @@ -403,6 +402,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); static struct apm_user *user_list; static DEFINE_SPINLOCK(user_list_lock); +static DEFINE_MUTEX(apm_mutex); /* * Set up a segment that references the real mode segment 0x40 @@ -1531,7 +1531,7 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) return -EPERM; switch (cmd) { case APM_IOC_STANDBY: - lock_kernel(); + mutex_lock(&apm_mutex); if (as->standbys_read > 0) { as->standbys_read--; as->standbys_pending--; @@ -1540,10 +1540,10 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) queue_event(APM_USER_STANDBY, as); if (standbys_pending <= 0) standby(); - unlock_kernel(); + mutex_unlock(&apm_mutex); break; case APM_IOC_SUSPEND: - lock_kernel(); + mutex_lock(&apm_mutex); if (as->suspends_read > 0) { as->suspends_read--; as->suspends_pending--; @@ -1552,13 +1552,14 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) queue_event(APM_USER_SUSPEND, as); if (suspends_pending <= 0) { ret = suspend(1); + mutex_unlock(&apm_mutex); } else { as->suspend_wait = 1; + mutex_unlock(&apm_mutex); wait_event_interruptible(apm_suspend_waitqueue, as->suspend_wait == 0); ret = as->suspend_result; } - unlock_kernel(); return ret; default: return -ENOTTY; @@ -1608,12 +1609,10 @@ static int do_open(struct inode *inode, struct file *filp) { struct apm_user *as; - lock_kernel(); as = kmalloc(sizeof(*as), GFP_KERNEL); if (as == NULL) { printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", sizeof(*as)); - unlock_kernel(); return -ENOMEM; } as->magic = APM_BIOS_MAGIC; @@ -1635,7 +1634,6 @@ static int do_open(struct inode *inode, struct file *filp) user_list = as; spin_unlock(&user_list_lock); filp->private_data = as; - unlock_kernel(); return 0; } diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 68537e957a9..1d2cb383410 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -5,6 +5,7 @@ # Don't trace early stages of a secondary CPU boot ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_common.o = -pg +CFLAGS_REMOVE_perf_event.o = -pg endif # Make sure load_percpu_segment has no stackprotector diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c910a716a71..7128b3799ce 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -535,7 +535,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } - display_cacheinfo(c); + cpu_detect_cache_sizes(c); /* Multi core CPU? */ if (c->extended_cpuid_level >= 0x80000008) { diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index c95e831bb09..e58d978e075 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_REP_GOOD); } - display_cacheinfo(c); + cpu_detect_cache_sizes(c); } enum { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc25c2b4a56..a4ec8b64754 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void) static void __cpuinit default_init(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_64 - display_cacheinfo(c); + cpu_detect_cache_sizes(c); #else /* Not much we can do here... */ /* Check if at least it has cpuid */ @@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c) } } -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) +void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -391,8 +391,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); #ifdef CONFIG_X86_64 /* On K8 L1 TLB is inclusive, so don't count it */ @@ -422,9 +420,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) #endif c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); } void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -659,24 +654,31 @@ void __init early_cpu_init(void) const struct cpu_dev *const *cdev; int count = 0; +#ifdef PROCESSOR_SELECT printk(KERN_INFO "KERNEL supported cpus:\n"); +#endif + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { const struct cpu_dev *cpudev = *cdev; - unsigned int j; if (count >= X86_VENDOR_NUM) break; cpu_devs[count] = cpudev; count++; - for (j = 0; j < 2; j++) { - if (!cpudev->c_ident[j]) - continue; - printk(KERN_INFO " %s %s\n", cpudev->c_vendor, - cpudev->c_ident[j]); +#ifdef PROCESSOR_SELECT + { + unsigned int j; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(KERN_INFO " %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } } +#endif } - early_identify_cpu(&boot_cpu_data); } @@ -837,10 +839,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } -#ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ - mcheck_init(c); -#endif + mcheck_cpu_init(c); select_idle_routine(c); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 6de9a908e40..3624e8a0f71 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -32,6 +32,6 @@ struct cpu_dev { extern const struct cpu_dev *const __x86_cpu_dev_start[], *const __x86_cpu_dev_end[]; -extern void display_cacheinfo(struct cpuinfo_x86 *c); +extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); #endif diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 19807b89f05..4fbd384fb64 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c) /* Handle the GX (Formally known as the GX2) */ if (c->x86 == 5 && c->x86_model == 5) - display_cacheinfo(c); + cpu_detect_cache_sizes(c); else init_cyrix(c); } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 804c40e2bc3..0df4c2b7107 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -488,22 +488,6 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) #endif } - if (trace) - printk(KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if (l1i) - printk(KERN_INFO "CPU: L1 I cache: %dK", l1i); - - if (l1d) - printk(KERN_CONT ", L1 D cache: %dK\n", l1d); - else - printk(KERN_CONT "\n"); - - if (l2) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - - if (l3) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); return l2; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 721a77ca811..0bcaa387586 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -46,6 +46,9 @@ #include "mce-internal.h" +#define CREATE_TRACE_POINTS +#include <trace/events/mce.h> + int mce_disabled __read_mostly; #define MISC_MCELOG_MINOR 227 @@ -85,18 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; -static void default_decode_mce(struct mce *m) +/* + * CPU/chipset specific EDAC code can register a notifier call here to print + * MCE errors in a human-readable form. + */ +ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); +EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); + +static int default_decode_mce(struct notifier_block *nb, unsigned long val, + void *data) { pr_emerg("No human readable MCE decoding support on this CPU type.\n"); pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); + + return NOTIFY_STOP; } -/* - * CPU/chipset specific EDAC code can register a callback here to print - * MCE errors in a human-readable form: - */ -void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; -EXPORT_SYMBOL(x86_mce_decode_callback); +static struct notifier_block mce_dec_nb = { + .notifier_call = default_decode_mce, + .priority = -1, +}; /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { @@ -141,6 +152,9 @@ void mce_log(struct mce *mce) { unsigned next, entry; + /* Emit the trace record: */ + trace_mce_record(mce); + mce->finished = 0; wmb(); for (;;) { @@ -204,9 +218,9 @@ static void print_mce(struct mce *m) /* * Print out human-readable details about the MCE error, - * (if the CPU has an implementation for that): + * (if the CPU has an implementation for that) */ - x86_mce_decode_callback(m); + atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); } static void print_mce_head(void) @@ -1122,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */ static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); -static void mcheck_timer(unsigned long data) +static void mce_start_timer(unsigned long data) { struct timer_list *t = &per_cpu(mce_timer, data); int *n; @@ -1187,7 +1201,7 @@ int mce_notify_irq(void) } EXPORT_SYMBOL_GPL(mce_notify_irq); -static int mce_banks_init(void) +static int __cpuinit __mcheck_cpu_mce_banks_init(void) { int i; @@ -1206,7 +1220,7 @@ static int mce_banks_init(void) /* * Initialize Machine Checks for a CPU. */ -static int __cpuinit mce_cap_init(void) +static int __cpuinit __mcheck_cpu_cap_init(void) { unsigned b; u64 cap; @@ -1228,7 +1242,7 @@ static int __cpuinit mce_cap_init(void) WARN_ON(banks != 0 && b != banks); banks = b; if (!mce_banks) { - int err = mce_banks_init(); + int err = __mcheck_cpu_mce_banks_init(); if (err) return err; @@ -1244,7 +1258,7 @@ static int __cpuinit mce_cap_init(void) return 0; } -static void mce_init(void) +static void __mcheck_cpu_init_generic(void) { mce_banks_t all_banks; u64 cap; @@ -1273,7 +1287,7 @@ static void mce_init(void) } /* Add per CPU specific workarounds here */ -static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) +static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { if (c->x86_vendor == X86_VENDOR_UNKNOWN) { pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); @@ -1341,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) return 0; } -static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) +static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) { if (c->x86 != 5) return; @@ -1355,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) } } -static void mce_cpu_features(struct cpuinfo_x86 *c) +static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { case X86_VENDOR_INTEL: @@ -1369,7 +1383,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) } } -static void mce_init_timer(void) +static void __mcheck_cpu_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); int *n = &__get_cpu_var(mce_next_interval); @@ -1380,7 +1394,7 @@ static void mce_init_timer(void) *n = check_interval * HZ; if (!*n) return; - setup_timer(t, mcheck_timer, smp_processor_id()); + setup_timer(t, mce_start_timer, smp_processor_id()); t->expires = round_jiffies(jiffies + *n); add_timer_on(t, smp_processor_id()); } @@ -1400,27 +1414,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = * Called for each booted CPU to set up machine checks. * Must be called with preempt off: */ -void __cpuinit mcheck_init(struct cpuinfo_x86 *c) +void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) { if (mce_disabled) return; - mce_ancient_init(c); + __mcheck_cpu_ancient_init(c); if (!mce_available(c)) return; - if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { + if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { mce_disabled = 1; return; } machine_check_vector = do_machine_check; - mce_init(); - mce_cpu_features(c); - mce_init_timer(); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_vendor(c); + __mcheck_cpu_init_timer(); INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); + } /* @@ -1640,6 +1655,15 @@ static int __init mcheck_enable(char *str) } __setup("mce", mcheck_enable); +int __init mcheck_init(void) +{ + atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); + + mcheck_intel_therm_init(); + + return 0; +} + /* * Sysfs support */ @@ -1648,7 +1672,7 @@ __setup("mce", mcheck_enable); * Disable machine checks on suspend and shutdown. We can't really handle * them later. */ -static int mce_disable(void) +static int mce_disable_error_reporting(void) { int i; @@ -1663,12 +1687,12 @@ static int mce_disable(void) static int mce_suspend(struct sys_device *dev, pm_message_t state) { - return mce_disable(); + return mce_disable_error_reporting(); } static int mce_shutdown(struct sys_device *dev) { - return mce_disable(); + return mce_disable_error_reporting(); } /* @@ -1678,8 +1702,8 @@ static int mce_shutdown(struct sys_device *dev) */ static int mce_resume(struct sys_device *dev) { - mce_init(); - mce_cpu_features(¤t_cpu_data); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_vendor(¤t_cpu_data); return 0; } @@ -1689,8 +1713,8 @@ static void mce_cpu_restart(void *data) del_timer_sync(&__get_cpu_var(mce_timer)); if (!mce_available(¤t_cpu_data)) return; - mce_init(); - mce_init_timer(); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_timer(); } /* Reinit MCEs after user configuration changes */ @@ -1716,7 +1740,7 @@ static void mce_enable_ce(void *all) cmci_reenable(); cmci_recheck(); if (all) - mce_init_timer(); + __mcheck_cpu_init_timer(); } static struct sysdev_class mce_sysclass = { @@ -1929,13 +1953,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu) } /* Make sure there are no machine checks on offlined CPUs. */ -static void mce_disable_cpu(void *h) +static void __cpuinit mce_disable_cpu(void *h) { unsigned long action = *(unsigned long *)h; int i; if (!mce_available(¤t_cpu_data)) return; + if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); for (i = 0; i < banks; i++) { @@ -1946,7 +1971,7 @@ static void mce_disable_cpu(void *h) } } -static void mce_reenable_cpu(void *h) +static void __cpuinit mce_reenable_cpu(void *h) { unsigned long action = *(unsigned long *)h; int i; @@ -2025,7 +2050,7 @@ static __init void mce_init_banks(void) } } -static __init int mce_init_device(void) +static __init int mcheck_init_device(void) { int err; int i = 0; @@ -2053,7 +2078,7 @@ static __init int mce_init_device(void) return err; } -device_initcall(mce_init_device); +device_initcall(mcheck_init_device); /* * Old style boot options parsing. Only for compatibility. @@ -2101,7 +2126,7 @@ static int fake_panic_set(void *data, u64 val) DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set, "%llu\n"); -static int __init mce_debugfs_init(void) +static int __init mcheck_debugfs_init(void) { struct dentry *dmce, *ffake_panic; @@ -2115,5 +2140,5 @@ static int __init mce_debugfs_init(void) return 0; } -late_initcall(mce_debugfs_init); +late_initcall(mcheck_debugfs_init); #endif diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index b3a1dba7533..4fef985fc22 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state); static atomic_t therm_throt_en = ATOMIC_INIT(0); +static u32 lvtthmr_init __read_mostly; + #ifdef CONFIG_SYSFS #define define_therm_throt_sysdev_one_ro(_name) \ static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) @@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) ack_APIC_irq(); } +void __init mcheck_intel_therm_init(void) +{ + /* + * This function is only called on boot CPU. Save the init thermal + * LVT value on BSP and use that value to restore APs' thermal LVT + * entry BIOS programmed later + */ + if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) && + cpu_has(&boot_cpu_data, X86_FEATURE_ACC)) + lvtthmr_init = apic_read(APIC_LVTTHMR); +} + void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); @@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) * since it might be delivered via SMI already: */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); + + /* + * The initial value of thermal LVT entries on all APs always reads + * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI + * sequence to them and LVT registers are reset to 0s except for + * the mask bits which are set to 1s when APs receive INIT IPI. + * Always restore the value that BIOS has programmed on AP based on + * BSP's info we saved since BIOS is always setting the same value + * for all threads/cores + */ + apic_write(APIC_LVTTHMR, lvtthmr_init); + + h = lvtthmr_init; + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b5801c31184..c1bbed1021d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -77,6 +77,18 @@ struct cpu_hw_events { struct debug_store *ds; }; +struct event_constraint { + unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + int code; +}; + +#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } +#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } + +#define for_each_event_constraint(e, c) \ + for ((e) = (c); (e)->idxmsk[0]; (e)++) + + /* * struct x86_pmu - generic x86 pmu */ @@ -102,6 +114,8 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); + int (*get_event_idx)(struct cpu_hw_events *cpuc, + struct hw_perf_event *hwc); }; static struct x86_pmu x86_pmu __read_mostly; @@ -110,6 +124,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; +static const struct event_constraint *event_constraints; + /* * Not sure about some of these */ @@ -155,6 +171,16 @@ static u64 p6_pmu_raw_event(u64 hw_event) return hw_event & P6_EVNTSEL_MASK; } +static const struct event_constraint intel_p6_event_constraints[] = +{ + EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ + EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT_END +}; /* * Intel PerfMon v3. Used on Core2 and later. @@ -170,6 +196,35 @@ static const u64 intel_perfmon_event_map[] = [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, }; +static const struct event_constraint intel_core_event_constraints[] = +{ + EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ + EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ + EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ + EVENT_CONSTRAINT_END +}; + +static const struct event_constraint intel_nehalem_event_constraints[] = +{ + EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ + EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ + EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ + EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ + EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ + EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ + EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT_END +}; + static u64 intel_pmu_event_map(int hw_event) { return intel_perfmon_event_map[hw_event]; @@ -190,7 +245,7 @@ static u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; -static const u64 nehalem_hw_cache_event_ids +static __initconst u64 nehalem_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -281,7 +336,7 @@ static const u64 nehalem_hw_cache_event_ids }, }; -static const u64 core2_hw_cache_event_ids +static __initconst u64 core2_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -372,7 +427,7 @@ static const u64 core2_hw_cache_event_ids }, }; -static const u64 atom_hw_cache_event_ids +static __initconst u64 atom_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -469,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL #define CORE_EVNTSEL_INV_MASK 0x00800000ULL -#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL +#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL #define CORE_EVNTSEL_MASK \ (CORE_EVNTSEL_EVENT_MASK | \ @@ -481,7 +536,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) return hw_event & CORE_EVNTSEL_MASK; } -static const u64 amd_hw_cache_event_ids +static __initconst u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -932,6 +987,8 @@ static int __hw_perf_event_init(struct perf_event *event) */ hwc->config = ARCH_PERFMON_EVENTSEL_INT; + hwc->idx = -1; + /* * Count user and OS events unless requested not to. */ @@ -1334,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) x86_pmu_enable_event(hwc, idx); } -static int -fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) +static int fixed_mode_idx(struct hw_perf_event *hwc) { unsigned int hw_event; @@ -1349,6 +1405,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) if (!x86_pmu.num_events_fixed) return -1; + /* + * fixed counters do not take all possible filters + */ + if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) + return -1; + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) @@ -1360,22 +1422,57 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) } /* - * Find a PMC slot for the freshly enabled / scheduled in event: + * generic counter allocator: get next free counter */ -static int x86_pmu_enable(struct perf_event *event) +static int +gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) +{ + int idx; + + idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); + return idx == x86_pmu.num_events ? -1 : idx; +} + +/* + * intel-specific counter allocator: check event constraints + */ +static int +intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) +{ + const struct event_constraint *event_constraint; + int i, code; + + if (!event_constraints) + goto skip; + + code = hwc->config & CORE_EVNTSEL_EVENT_MASK; + + for_each_event_constraint(event_constraint, event_constraints) { + if (code == event_constraint->code) { + for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { + if (!test_and_set_bit(i, cpuc->used_mask)) + return i; + } + return -1; + } + } +skip: + return gen_get_event_idx(cpuc, hwc); +} + +static int +x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; int idx; - idx = fixed_mode_idx(event, hwc); + idx = fixed_mode_idx(hwc); if (idx == X86_PMC_IDX_FIXED_BTS) { /* BTS is already occupied. */ if (test_and_set_bit(idx, cpuc->used_mask)) return -EAGAIN; hwc->config_base = 0; - hwc->event_base = 0; + hwc->event_base = 0; hwc->idx = idx; } else if (idx >= 0) { /* @@ -1396,20 +1493,35 @@ static int x86_pmu_enable(struct perf_event *event) } else { idx = hwc->idx; /* Try to get the previous generic event again */ - if (test_and_set_bit(idx, cpuc->used_mask)) { + if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { try_generic: - idx = find_first_zero_bit(cpuc->used_mask, - x86_pmu.num_events); - if (idx == x86_pmu.num_events) + idx = x86_pmu.get_event_idx(cpuc, hwc); + if (idx == -1) return -EAGAIN; set_bit(idx, cpuc->used_mask); hwc->idx = idx; } - hwc->config_base = x86_pmu.eventsel; - hwc->event_base = x86_pmu.perfctr; + hwc->config_base = x86_pmu.eventsel; + hwc->event_base = x86_pmu.perfctr; } + return idx; +} + +/* + * Find a PMC slot for the freshly enabled / scheduled in event: + */ +static int x86_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + + idx = x86_schedule_event(cpuc, hwc); + if (idx < 0) + return idx; + perf_events_lapic_init(); x86_pmu.disable(hwc, idx); @@ -1852,7 +1964,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { .priority = 1 }; -static struct x86_pmu p6_pmu = { +static __initconst struct x86_pmu p6_pmu = { .name = "p6", .handle_irq = p6_pmu_handle_irq, .disable_all = p6_pmu_disable_all, @@ -1877,9 +1989,10 @@ static struct x86_pmu p6_pmu = { */ .event_bits = 32, .event_mask = (1ULL << 32) - 1, + .get_event_idx = intel_get_event_idx, }; -static struct x86_pmu intel_pmu = { +static __initconst struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, .disable_all = intel_pmu_disable_all, @@ -1900,9 +2013,10 @@ static struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .enable_bts = intel_pmu_enable_bts, .disable_bts = intel_pmu_disable_bts, + .get_event_idx = intel_get_event_idx, }; -static struct x86_pmu amd_pmu = { +static __initconst struct x86_pmu amd_pmu = { .name = "AMD", .handle_irq = amd_pmu_handle_irq, .disable_all = amd_pmu_disable_all, @@ -1920,9 +2034,10 @@ static struct x86_pmu amd_pmu = { .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, + .get_event_idx = gen_get_event_idx, }; -static int p6_pmu_init(void) +static __init int p6_pmu_init(void) { switch (boot_cpu_data.x86_model) { case 1: @@ -1932,10 +2047,12 @@ static int p6_pmu_init(void) case 7: case 8: case 11: /* Pentium III */ + event_constraints = intel_p6_event_constraints; break; case 9: case 13: /* Pentium M */ + event_constraints = intel_p6_event_constraints; break; default: pr_cont("unsupported p6 CPU model %d ", @@ -1954,7 +2071,7 @@ static int p6_pmu_init(void) return 0; } -static int intel_pmu_init(void) +static __init int intel_pmu_init(void) { union cpuid10_edx edx; union cpuid10_eax eax; @@ -2007,12 +2124,14 @@ static int intel_pmu_init(void) sizeof(hw_cache_event_ids)); pr_cont("Core2 events, "); + event_constraints = intel_core_event_constraints; break; default: case 26: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + event_constraints = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; case 28: @@ -2025,7 +2144,7 @@ static int intel_pmu_init(void) return 0; } -static int amd_pmu_init(void) +static __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ if (boot_cpu_data.x86 < 6) @@ -2105,11 +2224,47 @@ static const struct pmu pmu = { .unthrottle = x86_pmu_unthrottle, }; +static int +validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + struct hw_perf_event fake_event = event->hw; + + if (event->pmu && event->pmu != &pmu) + return 0; + + return x86_schedule_event(cpuc, &fake_event) >= 0; +} + +static int validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct cpu_hw_events fake_pmu; + + memset(&fake_pmu, 0, sizeof(fake_pmu)); + + if (!validate_event(&fake_pmu, leader)) + return -ENOSPC; + + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (!validate_event(&fake_pmu, sibling)) + return -ENOSPC; + } + + if (!validate_event(&fake_pmu, event)) + return -ENOSPC; + + return 0; +} + const struct pmu *hw_perf_event_init(struct perf_event *event) { int err; err = __hw_perf_event_init(event); + if (!err) { + if (event->group_leader != event) + err = validate_group(event); + } if (err) { if (event->destroy) event->destroy(event); diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index fab786f60ed..898df9719af 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -712,7 +712,7 @@ static void probe_nmi_watchdog(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && - boot_cpu_data.x86 != 16) + boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) return; wd_ops = &k7_wd_ops; break; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index bb62b3e5caa..28000743bbb 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) early_init_transmeta(c); - display_cacheinfo(c); + cpu_detect_cache_sizes(c); /* Print CMS and CPU revision */ max = cpuid_eax(0x80860000); diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 6a52d4b36a3..7ef24a79699 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -116,21 +116,16 @@ static int cpuid_open(struct inode *inode, struct file *file) { unsigned int cpu; struct cpuinfo_x86 *c; - int ret = 0; - - lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { - ret = -ENXIO; /* No such CPU */ - goto out; - } + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + return -ENXIO; /* No such CPU */ + c = &cpu_data(cpu); if (c->cpuid_level < 0) - ret = -EIO; /* CPUID not supported */ -out: - unlock_kernel(); - return ret; + return -EIO; /* CPUID not supported */ + + return 0; } /* diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 5e409dc298a..a4849c10a77 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -27,8 +27,7 @@ #include <asm/cpu.h> #include <asm/reboot.h> #include <asm/virtext.h> -#include <asm/iommu.h> - +#include <asm/x86_init.h> #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) @@ -106,7 +105,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) #endif #ifdef CONFIG_X86_64 - pci_iommu_shutdown(); + x86_platform.iommu_shutdown(); #endif crash_save_cpu(regs, safe_smp_processor_id()); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 2d8a371d433..b8ce165dde5 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -268,11 +268,12 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) show_registers(regs); #ifdef CONFIG_X86_32 - sp = (unsigned long) (®s->sp); - savesegment(ss, ss); - if (user_mode(regs)) { + if (user_mode_vm(regs)) { sp = regs->sp; ss = regs->ss & 0xffff; + } else { + sp = kernel_stack_pointer(regs); + savesegment(ss, ss); } printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); print_symbol("%s", regs->ip); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index f7dd2a7c3bf..e0ed4c7abb6 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -10,9 +10,9 @@ #include <linux/module.h> #include <linux/ptrace.h> #include <linux/kexec.h> +#include <linux/sysfs.h> #include <linux/bug.h> #include <linux/nmi.h> -#include <linux/sysfs.h> #include <asm/stacktrace.h> @@ -35,6 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!stack) { unsigned long dummy; + stack = &dummy; if (task && task != current) stack = (unsigned long *)task->thread.sp; @@ -57,8 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = print_context_stack(context, stack, bp, ops, - data, NULL, &graph); + bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph); stack = (unsigned long *)context->previous_esp; if (!stack) @@ -72,7 +72,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, unsigned long bp, char *log_lvl) { unsigned long *stack; int i; @@ -156,4 +156,3 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } - diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index a071e6be177..8e740934bd1 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -10,26 +10,28 @@ #include <linux/module.h> #include <linux/ptrace.h> #include <linux/kexec.h> +#include <linux/sysfs.h> #include <linux/bug.h> #include <linux/nmi.h> -#include <linux/sysfs.h> #include <asm/stacktrace.h> #include "dumpstack.h" +#define N_EXCEPTION_STACKS_END \ + (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) static char x86_stack_ids[][8] = { - [DEBUG_STACK - 1] = "#DB", - [NMI_STACK - 1] = "NMI", - [DOUBLEFAULT_STACK - 1] = "#DF", - [STACKFAULT_STACK - 1] = "#SS", - [MCE_STACK - 1] = "#MC", + [ DEBUG_STACK-1 ] = "#DB", + [ NMI_STACK-1 ] = "NMI", + [ DOUBLEFAULT_STACK-1 ] = "#DF", + [ STACKFAULT_STACK-1 ] = "#SS", + [ MCE_STACK-1 ] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ - [N_EXCEPTION_STACKS ... - N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" + [ N_EXCEPTION_STACKS ... + N_EXCEPTION_STACKS_END ] = "#DB[?]" #endif - }; +}; int x86_is_stack_id(int id, char *name) { @@ -37,7 +39,7 @@ int x86_is_stack_id(int id, char *name) } static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, - unsigned *usedp, char **idp) + unsigned *usedp, char **idp) { unsigned k; @@ -202,21 +204,24 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, unsigned long bp, char *log_lvl) { + unsigned long *irq_stack_end; + unsigned long *irq_stack; unsigned long *stack; + int cpu; int i; - const int cpu = smp_processor_id(); - unsigned long *irq_stack_end = - (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); - unsigned long *irq_stack = - (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); + + preempt_disable(); + cpu = smp_processor_id(); + + irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); + irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); /* - * debugging aid: "show_stack(NULL, NULL);" prints the - * back trace for this cpu. + * Debugging aid: "show_stack(NULL, NULL);" prints the + * back trace for this cpu: */ - if (sp == NULL) { if (task) sp = (unsigned long *)task->thread.sp; @@ -240,6 +245,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, printk(" %016lx", *stack++); touch_nmi_watchdog(); } + preempt_enable(); + printk("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -303,4 +310,3 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } - diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d607c..50b9c220e12 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -334,6 +334,10 @@ ENTRY(ret_from_fork) END(ret_from_fork) /* + * Interrupt exit functions should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" +/* * Return to user mode is not as complex as all this looks, * but we want the default path for a system call return to * go as quickly as possible which is why some of this is @@ -383,6 +387,10 @@ need_resched: END(resume_kernel) #endif CFI_ENDPROC +/* + * End of kprobes section + */ + .popsection /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ @@ -513,6 +521,10 @@ sysexit_audit: PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) +/* + * syscall stub including irq exit should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can't unwind into user space anyway @@ -705,6 +717,10 @@ syscall_badsys: jmp resume_userspace END(syscall_badsys) CFI_ENDPROC +/* + * End of kprobes section + */ + .popsection /* * System calls that need a pt_regs pointer. @@ -814,6 +830,10 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC +/* + * Irq entries should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ @@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug) jmp error_code CFI_ENDPROC END(spurious_interrupt_bug) +/* + * End of kprobes section + */ + .popsection ENTRY(kernel_thread_helper) pushl $0 # fake return address for unwinder @@ -1185,17 +1209,14 @@ END(ftrace_graph_caller) .globl return_to_handler return_to_handler: - pushl $0 pushl %eax - pushl %ecx pushl %edx movl %ebp, %eax call ftrace_return_to_handler - movl %eax, 0xc(%esp) + movl %eax, %ecx popl %edx - popl %ecx popl %eax - ret + jmp *%ecx #endif .section .rodata,"a" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b5c061f8f35..4deb8fc849d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -155,11 +155,11 @@ GLOBAL(return_to_handler) call ftrace_return_to_handler - movq %rax, 16(%rsp) + movq %rax, %rdi movq 8(%rsp), %rdx movq (%rsp), %rax - addq $16, %rsp - retq + addq $24, %rsp + jmp *%rdi #endif @@ -803,6 +803,10 @@ END(interrupt) call \func .endm +/* + * Interrupt entry/exit should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -941,6 +945,10 @@ ENTRY(retint_kernel) CFI_ENDPROC END(common_interrupt) +/* + * End of kprobes section + */ + .popsection /* * APIC interrupts. @@ -1491,12 +1499,17 @@ error_kernelspace: leaq irq_return(%rip),%rcx cmpq %rcx,RIP+8(%rsp) je error_swapgs - movl %ecx,%ecx /* zero extend */ - cmpq %rcx,RIP+8(%rsp) - je error_swapgs + movl %ecx,%eax /* zero extend */ + cmpq %rax,RIP+8(%rsp) + je bstep_iret cmpq $gs_change,RIP+8(%rsp) je error_swapgs jmp error_sti + +bstep_iret: + /* Fix truncated RIP */ + movq %rcx,RIP+8(%rsp) + jmp error_swapgs END(error_entry) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9dbb527e165..5a1b9758fd6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -9,6 +9,8 @@ * the dangers of modifying code on the run. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/spinlock.h> #include <linux/hardirq.h> #include <linux/uaccess.h> @@ -336,15 +338,15 @@ int __init ftrace_dyn_arch_init(void *data) switch (faulted) { case 0: - pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); + pr_info("converting mcount calls to 0f 1f 44 00 00\n"); memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); break; case 1: - pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); + pr_info("converting mcount calls to 66 66 66 66 90\n"); memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); break; case 2: - pr_info("ftrace: converting mcount calls to jmp . + 5\n"); + pr_info("converting mcount calls to jmp . + 5\n"); memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); break; } @@ -468,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, #ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; extern unsigned long *sys_call_table; -static struct syscall_metadata **syscalls_metadata; - -static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) -{ - struct syscall_metadata *start; - struct syscall_metadata *stop; - char str[KSYM_SYMBOL_LEN]; - - - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; - kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str); - - for ( ; start < stop; start++) { - if (start->name && !strcmp(start->name, str)) - return start; - } - return NULL; -} - -struct syscall_metadata *syscall_nr_to_meta(int nr) -{ - if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) - return NULL; - - return syscalls_metadata[nr]; -} - -int syscall_name_to_nr(char *name) +unsigned long __init arch_syscall_addr(int nr) { - int i; - - if (!syscalls_metadata) - return -1; - - for (i = 0; i < NR_syscalls; i++) { - if (syscalls_metadata[i]) { - if (!strcmp(syscalls_metadata[i]->name, name)) - return i; - } - } - return -1; -} - -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) -{ - syscalls_metadata[num]->exit_id = id; -} - -static int __init arch_init_ftrace_syscalls(void) -{ - int i; - struct syscall_metadata *meta; - unsigned long **psys_syscall_table = &sys_call_table; - - syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * - NR_syscalls, GFP_KERNEL); - if (!syscalls_metadata) { - WARN_ON(1); - return -ENOMEM; - } - - for (i = 0; i < NR_syscalls; i++) { - meta = find_syscall_meta(psys_syscall_table[i]); - syscalls_metadata[i] = meta; - } - return 0; + return (unsigned long)(&sys_call_table)[nr]; } -arch_initcall(arch_init_ftrace_syscalls); #endif diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 780cd928fcd..22db86a3764 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -212,8 +212,8 @@ ENTRY(secondary_startup_64) */ lgdt early_gdt_descr(%rip) - /* set up data segments. actually 0 would do too */ - movl $__KERNEL_DS,%eax + /* set up data segments */ + xorl %eax,%eax movl %eax,%ds movl %eax,%ss movl %eax,%es diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c new file mode 100644 index 00000000000..d42f65ac492 --- /dev/null +++ b/arch/x86/kernel/hw_breakpoint.c @@ -0,0 +1,555 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2007 Alan Stern + * Copyright (C) 2009 IBM Corporation + * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> + * + * Authors: Alan Stern <stern@rowland.harvard.edu> + * K.Prasad <prasad@linux.vnet.ibm.com> + * Frederic Weisbecker <fweisbec@gmail.com> + */ + +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + */ + +#include <linux/perf_event.h> +#include <linux/hw_breakpoint.h> +#include <linux/irqflags.h> +#include <linux/notifier.h> +#include <linux/kallsyms.h> +#include <linux/kprobes.h> +#include <linux/percpu.h> +#include <linux/kdebug.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/smp.h> + +#include <asm/hw_breakpoint.h> +#include <asm/processor.h> +#include <asm/debugreg.h> + +/* Per cpu debug control register value */ +DEFINE_PER_CPU(unsigned long, cpu_dr7); +EXPORT_PER_CPU_SYMBOL(cpu_dr7); + +/* Per cpu debug address registers values */ +static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); + +/* + * Stores the breakpoints currently in use on each breakpoint address + * register for each cpus + */ +static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); + + +static inline unsigned long +__encode_dr7(int drnum, unsigned int len, unsigned int type) +{ + unsigned long bp_info; + + bp_info = (len | type) & 0xf; + bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); + bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); + + return bp_info; +} + +/* + * Encode the length, type, Exact, and Enable bits for a particular breakpoint + * as stored in debug register 7. + */ +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +{ + return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; +} + +/* + * Decode the length and type bits for a particular breakpoint as + * stored in debug register 7. Return the "enabled" status. + */ +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) +{ + int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); + + *len = (bp_info & 0xc) | 0x40; + *type = (bp_info & 0x3) | 0x80; + + return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; +} + +/* + * Install a perf counter breakpoint. + * + * We seek a free debug address register and use it for this + * breakpoint. Eventually we enable it in the debug control register. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + + if (!*slot) { + *slot = bp; + break; + } + } + + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return -EBUSY; + + set_debugreg(info->address, i); + __get_cpu_var(cpu_debugreg[i]) = info->address; + + dr7 = &__get_cpu_var(cpu_dr7); + *dr7 |= encode_dr7(i, info->len, info->type); + + set_debugreg(*dr7, 7); + + return 0; +} + +/* + * Uninstall the breakpoint contained in the given counter. + * + * First we search the debug address register it uses and then we disable + * it. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. + */ +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + + if (*slot == bp) { + *slot = NULL; + break; + } + } + + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return; + + dr7 = &__get_cpu_var(cpu_dr7); + *dr7 &= ~__encode_dr7(i, info->len, info->type); + + set_debugreg(*dr7, 7); +} + +static int get_hbp_len(u8 hbp_len) +{ + unsigned int len_in_bytes = 0; + + switch (hbp_len) { + case X86_BREAKPOINT_LEN_1: + len_in_bytes = 1; + break; + case X86_BREAKPOINT_LEN_2: + len_in_bytes = 2; + break; + case X86_BREAKPOINT_LEN_4: + len_in_bytes = 4; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: + len_in_bytes = 8; + break; +#endif + } + return len_in_bytes; +} + +/* + * Check for virtual address in user space. + */ +int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va <= TASK_SIZE - len); +} + +/* + * Check for virtual address in kernel space. + */ +static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); +} + +/* + * Store a breakpoint's encoded address, length, and type. + */ +static int arch_store_info(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + /* + * For kernel-addresses, either the address or symbol name can be + * specified. + */ + if (info->name) + info->address = (unsigned long) + kallsyms_lookup_name(info->name); + if (info->address) + return 0; + + return -EINVAL; +} + +int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type) +{ + /* Len */ + switch (x86_len) { + case X86_BREAKPOINT_LEN_1: + *gen_len = HW_BREAKPOINT_LEN_1; + break; + case X86_BREAKPOINT_LEN_2: + *gen_len = HW_BREAKPOINT_LEN_2; + break; + case X86_BREAKPOINT_LEN_4: + *gen_len = HW_BREAKPOINT_LEN_4; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: + *gen_len = HW_BREAKPOINT_LEN_8; + break; +#endif + default: + return -EINVAL; + } + + /* Type */ + switch (x86_type) { + case X86_BREAKPOINT_EXECUTE: + *gen_type = HW_BREAKPOINT_X; + break; + case X86_BREAKPOINT_WRITE: + *gen_type = HW_BREAKPOINT_W; + break; + case X86_BREAKPOINT_RW: + *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; + break; + default: + return -EINVAL; + } + + return 0; +} + + +static int arch_build_bp_info(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + info->address = bp->attr.bp_addr; + + /* Len */ + switch (bp->attr.bp_len) { + case HW_BREAKPOINT_LEN_1: + info->len = X86_BREAKPOINT_LEN_1; + break; + case HW_BREAKPOINT_LEN_2: + info->len = X86_BREAKPOINT_LEN_2; + break; + case HW_BREAKPOINT_LEN_4: + info->len = X86_BREAKPOINT_LEN_4; + break; +#ifdef CONFIG_X86_64 + case HW_BREAKPOINT_LEN_8: + info->len = X86_BREAKPOINT_LEN_8; + break; +#endif + default: + return -EINVAL; + } + + /* Type */ + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_W: + info->type = X86_BREAKPOINT_WRITE; + break; + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + info->type = X86_BREAKPOINT_RW; + break; + case HW_BREAKPOINT_X: + info->type = X86_BREAKPOINT_EXECUTE; + break; + default: + return -EINVAL; + } + + return 0; +} +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned int align; + int ret; + + + ret = arch_build_bp_info(bp); + if (ret) + return ret; + + ret = -EINVAL; + + if (info->type == X86_BREAKPOINT_EXECUTE) + /* + * Ptrace-refactoring code + * For now, we'll allow instruction breakpoint only for user-space + * addresses + */ + if ((!arch_check_va_in_userspace(info->address, info->len)) && + info->len != X86_BREAKPOINT_EXECUTE) + return ret; + + switch (info->len) { + case X86_BREAKPOINT_LEN_1: + align = 0; + break; + case X86_BREAKPOINT_LEN_2: + align = 1; + break; + case X86_BREAKPOINT_LEN_4: + align = 3; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: + align = 7; + break; +#endif + default: + return ret; + } + + if (bp->callback) + ret = arch_store_info(bp); + + if (ret < 0) + return ret; + /* + * Check that the low-order bits of the address are appropriate + * for the alignment implied by len. + */ + if (info->address & align) + return -EINVAL; + + /* Check that the virtual address is in the proper range */ + if (tsk) { + if (!arch_check_va_in_userspace(info->address, info->len)) + return -EFAULT; + } else { + if (!arch_check_va_in_kernelspace(info->address, info->len)) + return -EFAULT; + } + + return 0; +} + +/* + * Dump the debug register contents to the user. + * We can't dump our per cpu values because it + * may contain cpu wide breakpoint, something that + * doesn't belong to the current task. + * + * TODO: include non-ptrace user breakpoints (perf) + */ +void aout_dump_debugregs(struct user *dump) +{ + int i; + int dr7 = 0; + struct perf_event *bp; + struct arch_hw_breakpoint *info; + struct thread_struct *thread = ¤t->thread; + + for (i = 0; i < HBP_NUM; i++) { + bp = thread->ptrace_bps[i]; + + if (bp && !bp->attr.disabled) { + dump->u_debugreg[i] = bp->attr.bp_addr; + info = counter_arch_bp(bp); + dr7 |= encode_dr7(i, info->len, info->type); + } else { + dump->u_debugreg[i] = 0; + } + } + + dump->u_debugreg[4] = 0; + dump->u_debugreg[5] = 0; + dump->u_debugreg[6] = current->thread.debugreg6; + + dump->u_debugreg[7] = dr7; +} +EXPORT_SYMBOL_GPL(aout_dump_debugregs); + +/* + * Release the user breakpoints used by ptrace + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) +{ + int i; + struct thread_struct *t = &tsk->thread; + + for (i = 0; i < HBP_NUM; i++) { + unregister_hw_breakpoint(t->ptrace_bps[i]); + t->ptrace_bps[i] = NULL; + } +} + +void hw_breakpoint_restore(void) +{ + set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); + set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); + set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); + set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); + set_debugreg(current->thread.debugreg6, 6); + set_debugreg(__get_cpu_var(cpu_dr7), 7); +} +EXPORT_SYMBOL_GPL(hw_breakpoint_restore); + +/* + * Handle debug exception notifications. + * + * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. + * + * NOTIFY_DONE returned if one of the following conditions is true. + * i) When the causative address is from user-space and the exception + * is a valid one, i.e. not triggered as a result of lazy debug register + * switching + * ii) When there are more bits than trap<n> set in DR6 register (such + * as BD, BS or BT) indicating that more than one debug condition is + * met and requires some more action in do_debug(). + * + * NOTIFY_STOP returned for all other cases + * + */ +static int __kprobes hw_breakpoint_handler(struct die_args *args) +{ + int i, cpu, rc = NOTIFY_STOP; + struct perf_event *bp; + unsigned long dr7, dr6; + unsigned long *dr6_p; + + /* The DR6 value is pointed by args->err */ + dr6_p = (unsigned long *)ERR_PTR(args->err); + dr6 = *dr6_p; + + /* Do an early return if no trap bits are set in DR6 */ + if ((dr6 & DR_TRAP_BITS) == 0) + return NOTIFY_DONE; + + get_debugreg(dr7, 7); + /* Disable breakpoints during exception handling */ + set_debugreg(0UL, 7); + /* + * Assert that local interrupts are disabled + * Reset the DRn bits in the virtualized register value. + * The ptrace trigger routine will add in whatever is needed. + */ + current->thread.debugreg6 &= ~DR_TRAP_BITS; + cpu = get_cpu(); + + /* Handle all the breakpoints that were triggered */ + for (i = 0; i < HBP_NUM; ++i) { + if (likely(!(dr6 & (DR_TRAP0 << i)))) + continue; + + /* + * The counter may be concurrently released but that can only + * occur from a call_rcu() path. We can then safely fetch + * the breakpoint, use its callback, touch its counter + * while we are in an rcu_read_lock() path. + */ + rcu_read_lock(); + + bp = per_cpu(bp_per_reg[i], cpu); + if (bp) + rc = NOTIFY_DONE; + /* + * Reset the 'i'th TRAP bit in dr6 to denote completion of + * exception handling + */ + (*dr6_p) &= ~(DR_TRAP0 << i); + /* + * bp can be NULL due to lazy debug register switching + * or due to concurrent perf counter removing. + */ + if (!bp) { + rcu_read_unlock(); + break; + } + + (bp->callback)(bp, args->regs); + + rcu_read_unlock(); + } + if (dr6 & (~DR_TRAP_BITS)) + rc = NOTIFY_DONE; + + set_debugreg(dr7, 7); + put_cpu(); + + return rc; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_exceptions_notify( + struct notifier_block *unused, unsigned long val, void *data) +{ + if (val != DIE_DEBUG) + return NOTIFY_DONE; + + return hw_breakpoint_handler(data); +} + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ + /* TODO */ +} + +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp) +{ + /* TODO */ +} diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 04bbd527856..fee6cc2b207 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR seq_printf(p, "%*s: ", prec, "TRM"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); seq_printf(p, " Thermal event interrupts\n"); -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD seq_printf(p, "%*s: ", prec, "THR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); seq_printf(p, " Threshold APIC interrupts\n"); -# endif #endif #ifdef CONFIG_X86_MCE seq_printf(p, "%*s: ", prec, "MCE"); @@ -194,11 +194,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->irq_call_count; sum += irq_stats(cpu)->irq_tlb_count; #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR sum += irq_stats(cpu)->irq_thermal_count; -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD sum += irq_stats(cpu)->irq_threshold_count; -# endif #endif #ifdef CONFIG_X86_MCE sum += per_cpu(mce_exception_count, cpu); @@ -274,3 +274,93 @@ void smp_generic_interrupt(struct pt_regs *regs) } EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); + +#ifdef CONFIG_HOTPLUG_CPU +/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ +void fixup_irqs(void) +{ + unsigned int irq, vector; + static int warned; + struct irq_desc *desc; + + for_each_irq_desc(irq, desc) { + int break_affinity = 0; + int set_affinity = 1; + const struct cpumask *affinity; + + if (!desc) + continue; + if (irq == 2) + continue; + + /* interrupt's are disabled at this point */ + spin_lock(&desc->lock); + + affinity = desc->affinity; + if (!irq_has_action(irq) || + cpumask_equal(affinity, cpu_online_mask)) { + spin_unlock(&desc->lock); + continue; + } + + /* + * Complete the irq move. This cpu is going down and for + * non intr-remapping case, we can't wait till this interrupt + * arrives at this cpu before completing the irq move. + */ + irq_force_complete_move(irq); + + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + break_affinity = 1; + affinity = cpu_all_mask; + } + + if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask) + desc->chip->mask(irq); + + if (desc->chip->set_affinity) + desc->chip->set_affinity(irq, affinity); + else if (!(warned++)) + set_affinity = 0; + + if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask) + desc->chip->unmask(irq); + + spin_unlock(&desc->lock); + + if (break_affinity && set_affinity) + printk("Broke affinity for irq %i\n", irq); + else if (!set_affinity) + printk("Cannot set affinity for irq %i\n", irq); + } + + /* + * We can remove mdelay() and then send spuriuous interrupts to + * new cpu targets for all the irqs that were handled previously by + * this cpu. While it works, I have seen spurious interrupt messages + * (nothing wrong but still...). + * + * So for now, retain mdelay(1) and check the IRR and then send those + * interrupts to new targets as this cpu is already offlined... + */ + mdelay(1); + + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + unsigned int irr; + + if (__get_cpu_var(vector_irq)[vector] < 0) + continue; + + irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + if (irr & (1 << (vector % 32))) { + irq = __get_cpu_var(vector_irq)[vector]; + + desc = irq_to_desc(irq); + spin_lock(&desc->lock); + if (desc->chip->retrigger) + desc->chip->retrigger(irq); + spin_unlock(&desc->lock); + } + } +} +#endif diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 7d35d0fe232..10709f29d16 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -211,48 +211,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } - -#ifdef CONFIG_HOTPLUG_CPU - -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) -{ - unsigned int irq; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - const struct cpumask *affinity; - - if (!desc) - continue; - if (irq == 2) - continue; - - affinity = desc->affinity; - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - printk("Breaking affinity for irq %i\n", irq); - affinity = cpu_all_mask; - } - if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); - else if (desc->action) - printk_once("Cannot set affinity for irq %i\n", irq); - } - -#if 0 - barrier(); - /* Ingo Molnar says: "after the IO-APIC masks have been redirected - [note the nop - the interrupt-enable boundary on x86 is two - instructions from sti] - to flush out pending hardirqs and - IPIs. After this point nothing is supposed to reach this CPU." */ - __asm__ __volatile__("sti; nop; cli"); - barrier(); -#else - /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); -#endif -} -#endif - diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 977d8b43a0d..acf8fbf8fbd 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -62,64 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } -#ifdef CONFIG_HOTPLUG_CPU -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) -{ - unsigned int irq; - static int warned; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - int break_affinity = 0; - int set_affinity = 1; - const struct cpumask *affinity; - - if (!desc) - continue; - if (irq == 2) - continue; - - /* interrupt's are disabled at this point */ - spin_lock(&desc->lock); - - affinity = desc->affinity; - if (!irq_has_action(irq) || - cpumask_equal(affinity, cpu_online_mask)) { - spin_unlock(&desc->lock); - continue; - } - - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - break_affinity = 1; - affinity = cpu_all_mask; - } - - if (desc->chip->mask) - desc->chip->mask(irq); - - if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); - else if (!(warned++)) - set_affinity = 0; - - if (desc->chip->unmask) - desc->chip->unmask(irq); - - spin_unlock(&desc->lock); - - if (break_affinity && set_affinity) - printk("Broke affinity for irq %i\n", irq); - else if (!set_affinity) - printk("Cannot set affinity for irq %i\n", irq); - } - - /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); -} -#endif extern void call_softirq(void); diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8d82a77a3f3..20a5b368946 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -43,6 +43,7 @@ #include <linux/smp.h> #include <linux/nmi.h> +#include <asm/debugreg.h> #include <asm/apicdef.h> #include <asm/system.h> @@ -88,7 +89,6 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_SS] = __KERNEL_DS; gdb_regs[GDB_FS] = 0xFFFF; gdb_regs[GDB_GS] = 0xFFFF; - gdb_regs[GDB_SP] = (int)®s->sp; #else gdb_regs[GDB_R8] = regs->r8; gdb_regs[GDB_R9] = regs->r9; @@ -101,8 +101,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs32[GDB_PS] = regs->flags; gdb_regs32[GDB_CS] = regs->cs; gdb_regs32[GDB_SS] = regs->ss; - gdb_regs[GDB_SP] = regs->sp; #endif + gdb_regs[GDB_SP] = kernel_stack_pointer(regs); } /** @@ -434,6 +434,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) "resuming...\n"); kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c", "", regs); + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; return NOTIFY_STOP; } diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b00..1f3186ce213 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -48,31 +48,22 @@ #include <linux/preempt.h> #include <linux/module.h> #include <linux/kdebug.h> +#include <linux/kallsyms.h> #include <asm/cacheflush.h> #include <asm/desc.h> #include <asm/pgtable.h> #include <asm/uaccess.h> #include <asm/alternative.h> +#include <asm/insn.h> +#include <asm/debugreg.h> void jprobe_return_end(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -#ifdef CONFIG_X86_64 -#define stack_addr(regs) ((unsigned long *)regs->sp) -#else -/* - * "®s->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs - * don't save the ss and esp registers if the CPU is already in kernel - * mode when it traps. So for kprobes, regs->sp and regs->ss are not - * the [nonexistent] saved stack pointer and ss register, but rather - * the top 8 bytes of the pre-int3 stack. So ®s->sp happens to - * point to the top of the pre-int3 stack. - */ -#define stack_addr(regs) ((unsigned long *)®s->sp) -#endif +#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ @@ -106,50 +97,6 @@ static const u32 twobyte_is_boostable[256 / 32] = { /* ----------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; -static const u32 onebyte_has_modrm[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ----------------------------------------------- */ - W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */ - W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */ - W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */ - W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */ - W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ - W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ - W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */ - W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */ - W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ - W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */ - W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */ - W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */ - W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */ - W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ - W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */ - W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */ - /* ----------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; -static const u32 twobyte_has_modrm[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ----------------------------------------------- */ - W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */ - W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */ - W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */ - W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */ - W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */ - W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */ - W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */ - W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */ - W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */ - W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */ - W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */ - W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */ - W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */ - W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */ - W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */ - W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */ - /* ----------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; #undef W struct kretprobe_blackpoint kretprobe_blacklist[] = { @@ -244,6 +191,75 @@ retry: } } +/* Recover the probed instruction at addr for further analysis. */ +static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) +{ + struct kprobe *kp; + kp = get_kprobe((void *)addr); + if (!kp) + return -EINVAL; + + /* + * Basically, kp->ainsn.insn has an original instruction. + * However, RIP-relative instruction can not do single-stepping + * at different place, fix_riprel() tweaks the displacement of + * that instruction. In that case, we can't recover the instruction + * from the kp->ainsn.insn. + * + * On the other hand, kp->opcode has a copy of the first byte of + * the probed instruction, which is overwritten by int3. And + * the instruction at kp->addr is not modified by kprobes except + * for the first byte, we can recover the original instruction + * from it and kp->opcode. + */ + memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + buf[0] = kp->opcode; + return 0; +} + +/* Dummy buffers for kallsyms_lookup */ +static char __dummy_buf[KSYM_NAME_LEN]; + +/* Check if paddr is at an instruction boundary */ +static int __kprobes can_probe(unsigned long paddr) +{ + int ret; + unsigned long addr, offset = 0; + struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; + + if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) + return 0; + + /* Decode instructions */ + addr = paddr - offset; + while (addr < paddr) { + kernel_insn_init(&insn, (void *)addr); + insn_get_opcode(&insn); + + /* + * Check if the instruction has been modified by another + * kprobe, in which case we replace the breakpoint by the + * original instruction in our buffer. + */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { + ret = recover_probed_instruction(buf, addr); + if (ret) + /* + * Another debugging subsystem might insert + * this breakpoint. In that case, we can't + * recover it. + */ + return 0; + kernel_insn_init(&insn, buf); + } + insn_get_length(&insn); + addr += insn.length; + } + + return (addr == paddr); +} + /* * Returns non-zero if opcode modifies the interrupt flag. */ @@ -277,68 +293,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) static void __kprobes fix_riprel(struct kprobe *p) { #ifdef CONFIG_X86_64 - u8 *insn = p->ainsn.insn; - s64 disp; - int need_modrm; - - /* Skip legacy instruction prefixes. */ - while (1) { - switch (*insn) { - case 0x66: - case 0x67: - case 0x2e: - case 0x3e: - case 0x26: - case 0x64: - case 0x65: - case 0x36: - case 0xf0: - case 0xf3: - case 0xf2: - ++insn; - continue; - } - break; - } + struct insn insn; + kernel_insn_init(&insn, p->ainsn.insn); - /* Skip REX instruction prefix. */ - if (is_REX_prefix(insn)) - ++insn; - - if (*insn == 0x0f) { - /* Two-byte opcode. */ - ++insn; - need_modrm = test_bit(*insn, - (unsigned long *)twobyte_has_modrm); - } else - /* One-byte opcode. */ - need_modrm = test_bit(*insn, - (unsigned long *)onebyte_has_modrm); - - if (need_modrm) { - u8 modrm = *++insn; - if ((modrm & 0xc7) == 0x05) { - /* %rip+disp32 addressing mode */ - /* Displacement follows ModRM byte. */ - ++insn; - /* - * The copied instruction uses the %rip-relative - * addressing mode. Adjust the displacement for the - * difference between the original location of this - * instruction and the location of the copy that will - * actually be run. The tricky bit here is making sure - * that the sign extension happens correctly in this - * calculation, since we need a signed 32-bit result to - * be sign-extended to 64 bits when it's added to the - * %rip value and yield the same 64-bit result that the - * sign-extension of the original signed 32-bit - * displacement would have given. - */ - disp = (u8 *) p->addr + *((s32 *) insn) - - (u8 *) p->ainsn.insn; - BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ - *(s32 *)insn = (s32) disp; - } + if (insn_rip_relative(&insn)) { + s64 newdisp; + u8 *disp; + insn_get_displacement(&insn); + /* + * The copied instruction uses the %rip-relative addressing + * mode. Adjust the displacement for the difference between + * the original location of this instruction and the location + * of the copy that will actually be run. The tricky bit here + * is making sure that the sign extension happens correctly in + * this calculation, since we need a signed 32-bit result to + * be sign-extended to 64 bits when it's added to the %rip + * value and yield the same 64-bit result that the sign- + * extension of the original signed 32-bit displacement would + * have given. + */ + newdisp = (u8 *) p->addr + (s64) insn.displacement.value - + (u8 *) p->ainsn.insn; + BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ + disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); + *(s32 *) disp = (s32) newdisp; } #endif } @@ -359,6 +337,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) int __kprobes arch_prepare_kprobe(struct kprobe *p) { + if (!can_probe((unsigned long)p->addr)) + return -EILSEQ; /* insn: must be on special executable page on x86. */ p->ainsn.insn = get_insn_slot(); if (!p->ainsn.insn) @@ -472,17 +452,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, { switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: -#ifdef CONFIG_X86_64 - /* TODO: Provide re-entrancy from post_kprobes_handler() and - * avoid exception stack corruption while single-stepping on - * the instruction of the new probe. - */ - arch_disarm_kprobe(p); - regs->ip = (unsigned long)p->addr; - reset_current_kprobe(); - preempt_enable_no_resched(); - break; -#endif case KPROBE_HIT_ACTIVE: save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); @@ -491,18 +460,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_status = KPROBE_REENTER; break; case KPROBE_HIT_SS: - if (p == kprobe_running()) { - regs->flags &= ~X86_EFLAGS_TF; - regs->flags |= kcb->kprobe_saved_flags; - return 0; - } else { - /* A probe has been hit in the codepath leading up - * to, or just after, single-stepping of a probed - * instruction. This entire codepath should strictly - * reside in .kprobes.text section. Raise a warning - * to highlight this peculiar case. - */ - } + /* A probe has been hit in the codepath leading up to, or just + * after, single-stepping of a probed instruction. This entire + * codepath should strictly reside in .kprobes.text section. + * Raise a BUG or we'll continue in an endless reentering loop + * and eventually a stack overflow. + */ + printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", + p->addr); + dump_kprobe(p); + BUG(); default: /* impossible cases */ WARN_ON(1); @@ -967,8 +934,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) + if (post_kprobe_handler(args->regs)) { + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; ret = NOTIFY_STOP; + } break; case DIE_GPF: /* diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d0013..c843f8406da 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -25,6 +25,7 @@ #include <asm/desc.h> #include <asm/system.h> #include <asm/cacheflush.h> +#include <asm/debugreg.h> static void set_idt(void *newidt, __u16 limit) { @@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 84c3bf209e9..4a8bb82248a 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -18,6 +18,7 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/debugreg.h> static int init_one_level2_page(struct kimage *image, pgd_t *pgd, unsigned long addr) @@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 378e9a8f1bf..2bcad3926ed 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -73,7 +73,6 @@ #include <linux/platform_device.h> #include <linux/miscdevice.h> #include <linux/capability.h> -#include <linux/smp_lock.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/mutex.h> @@ -201,7 +200,6 @@ static int do_microcode_update(const void __user *buf, size_t size) static int microcode_open(struct inode *unused1, struct file *unused2) { - cycle_kernel_lock(); return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 6a3cefc7dda..553449951b8 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -174,21 +174,17 @@ static int msr_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file->f_path.dentry->d_inode); struct cpuinfo_x86 *c = &cpu_data(cpu); - int ret = 0; - lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { - ret = -ENXIO; /* No such CPU */ - goto out; - } + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + return -ENXIO; /* No such CPU */ + c = &cpu_data(cpu); if (!cpu_has(c, X86_FEATURE_MSR)) - ret = -EIO; /* MSR not supported */ -out: - unlock_kernel(); - return ret; + return -EIO; /* MSR not supported */ + + return 0; } /* diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 971a3bec47a..c563e4c8ff3 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -46,6 +46,7 @@ #include <asm/dma.h> #include <asm/rio.h> #include <asm/bios_ebda.h> +#include <asm/x86_init.h> #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT int use_calgary __read_mostly = 1; @@ -244,7 +245,7 @@ static unsigned long iommu_range_alloc(struct device *dev, if (panic_on_overflow) panic("Calgary: fix the allocator.\n"); else - return bad_dma_address; + return DMA_ERROR_CODE; } } @@ -260,12 +261,15 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, void *vaddr, unsigned int npages, int direction) { unsigned long entry; - dma_addr_t ret = bad_dma_address; + dma_addr_t ret; entry = iommu_range_alloc(dev, tbl, npages); - if (unlikely(entry == bad_dma_address)) - goto error; + if (unlikely(entry == DMA_ERROR_CODE)) { + printk(KERN_WARNING "Calgary: failed to allocate %u pages in " + "iommu %p\n", npages, tbl); + return DMA_ERROR_CODE; + } /* set the return dma address */ ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK); @@ -273,13 +277,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, /* put the TCEs in the HW table */ tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK, direction); - return ret; - -error: - printk(KERN_WARNING "Calgary: failed to allocate %u pages in " - "iommu %p\n", npages, tbl); - return bad_dma_address; } static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, @@ -290,8 +288,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned long flags; /* were we called with bad_dma_address? */ - badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); - if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { + badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE); + if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) { WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " "address 0x%Lx\n", dma_addr); return; @@ -318,13 +316,15 @@ static inline struct iommu_table *find_iommu_table(struct device *dev) pdev = to_pci_dev(dev); + /* search up the device tree for an iommu */ pbus = pdev->bus; - - /* is the device behind a bridge? Look for the root bus */ - while (pbus->parent) + do { + tbl = pci_iommu(pbus); + if (tbl && tbl->it_busno == pbus->number) + break; + tbl = NULL; pbus = pbus->parent; - - tbl = pci_iommu(pbus); + } while (pbus); BUG_ON(tbl && (tbl->it_busno != pbus->number)); @@ -373,7 +373,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); entry = iommu_range_alloc(dev, tbl, npages); - if (entry == bad_dma_address) { + if (entry == DMA_ERROR_CODE) { /* makes sure unmap knows to stop */ s->dma_length = 0; goto error; @@ -391,7 +391,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, error: calgary_unmap_sg(dev, sg, nelems, dir, NULL); for_each_sg(sg, s, nelems, i) { - sg->dma_address = bad_dma_address; + sg->dma_address = DMA_ERROR_CODE; sg->dma_length = 0; } return 0; @@ -446,7 +446,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, /* set up tces to cover the allocated range */ mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); - if (mapping == bad_dma_address) + if (mapping == DMA_ERROR_CODE) goto free; *dma_handle = mapping; return ret; @@ -727,7 +727,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev) struct iommu_table *tbl = pci_iommu(dev->bus); /* reserve EMERGENCY_PAGES from bad_dma_address and up */ - iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES); + iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES); /* avoid the BIOS/VGA first 640KB-1MB region */ /* for CalIOC2 - avoid the entire first MB */ @@ -1344,6 +1344,23 @@ static void __init get_tce_space_from_tar(void) return; } +static int __init calgary_iommu_init(void) +{ + int ret; + + /* ok, we're trying to use Calgary - let's roll */ + printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); + + ret = calgary_init(); + if (ret) { + printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " + "falling back to no_iommu\n", ret); + return ret; + } + + return 0; +} + void __init detect_calgary(void) { int bus; @@ -1357,7 +1374,7 @@ void __init detect_calgary(void) * if the user specified iommu=off or iommu=soft or we found * another HW IOMMU already, bail out. */ - if (swiotlb || no_iommu || iommu_detected) + if (no_iommu || iommu_detected) return; if (!use_calgary) @@ -1442,9 +1459,7 @@ void __init detect_calgary(void) printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", specified_table_size); - /* swiotlb for devices that aren't behind the Calgary. */ - if (max_pfn > MAX_DMA32_PFN) - swiotlb = 1; + x86_init.iommu.iommu_init = calgary_iommu_init; } return; @@ -1457,35 +1472,6 @@ cleanup: } } -int __init calgary_iommu_init(void) -{ - int ret; - - if (no_iommu || (swiotlb && !calgary_detected)) - return -ENODEV; - - if (!calgary_detected) - return -ENODEV; - - /* ok, we're trying to use Calgary - let's roll */ - printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); - - ret = calgary_init(); - if (ret) { - printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " - "falling back to no_iommu\n", ret); - return ret; - } - - force_iommu = 1; - bad_dma_address = 0x0; - /* dma_ops is set to swiotlb or nommu */ - if (!dma_ops) - dma_ops = &nommu_dma_ops; - - return 0; -} - static int __init calgary_parse_options(char *p) { unsigned int bridge; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index a6e804d16c3..afcc58b69c7 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -11,10 +11,11 @@ #include <asm/gart.h> #include <asm/calgary.h> #include <asm/amd_iommu.h> +#include <asm/x86_init.h> static int forbid_dac __read_mostly; -struct dma_map_ops *dma_ops; +struct dma_map_ops *dma_ops = &nommu_dma_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -42,9 +43,6 @@ int iommu_detected __read_mostly = 0; */ int iommu_pass_through __read_mostly; -dma_addr_t bad_dma_address __read_mostly = 0; -EXPORT_SYMBOL(bad_dma_address); - /* Dummy device used for NULL arguments (normally ISA). */ struct device x86_dma_fallback_dev = { .init_name = "fallback device", @@ -126,20 +124,17 @@ void __init pci_iommu_alloc(void) /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); #endif + if (pci_swiotlb_init()) + return; - /* - * The order of these functions is important for - * fall-back/fail-over reasons - */ gart_iommu_hole_init(); detect_calgary(); detect_intel_iommu(); + /* needs to be called after gart_iommu_hole_init */ amd_iommu_detect(); - - pci_swiotlb_init(); } void *dma_generic_alloc_coherent(struct device *dev, size_t size, @@ -214,7 +209,7 @@ static __init int iommu_setup(char *p) if (!strncmp(p, "allowdac", 8)) forbid_dac = 0; if (!strncmp(p, "nodac", 5)) - forbid_dac = -1; + forbid_dac = 1; if (!strncmp(p, "usedac", 6)) { forbid_dac = -1; return 1; @@ -289,25 +284,17 @@ static int __init pci_iommu_init(void) #ifdef CONFIG_PCI dma_debug_add_bus(&pci_bus_type); #endif + x86_init.iommu.iommu_init(); - calgary_iommu_init(); - - intel_iommu_init(); + if (swiotlb) { + printk(KERN_INFO "PCI-DMA: " + "Using software bounce buffering for IO (SWIOTLB)\n"); + swiotlb_print_info(); + } else + swiotlb_free(); - amd_iommu_init(); - - gart_iommu_init(); - - no_iommu_init(); return 0; } - -void pci_iommu_shutdown(void) -{ - gart_iommu_shutdown(); - - amd_iommu_shutdown(); -} /* Must execute after PCI subsystem */ rootfs_initcall(pci_iommu_init); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a7f1b64f86e..e6a0d402f17 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -39,6 +39,7 @@ #include <asm/swiotlb.h> #include <asm/dma.h> #include <asm/k8.h> +#include <asm/x86_init.h> static unsigned long iommu_bus_base; /* GART remapping area (physical) */ static unsigned long iommu_size; /* size of remapping area bytes */ @@ -46,6 +47,8 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ +static dma_addr_t bad_dma_addr; + /* * If this is disabled the IOMMU will use an optimized flushing strategy * of only flushing when an mapping is reused. With it true the GART is @@ -92,7 +95,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; - boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, + boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; spin_lock_irqsave(&iommu_bitmap_lock, flags); @@ -216,7 +219,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, if (panic_on_overflow) panic("dma_map_area overflow %lu bytes\n", size); iommu_full(dev, size, dir); - return bad_dma_address; + return bad_dma_addr; } for (i = 0; i < npages; i++) { @@ -294,7 +297,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, int i; #ifdef CONFIG_IOMMU_DEBUG - printk(KERN_DEBUG "dma_map_sg overflow\n"); + pr_debug("dma_map_sg overflow\n"); #endif for_each_sg(sg, s, nents, i) { @@ -302,7 +305,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, if (nonforced_iommu(dev, addr, s->length)) { addr = dma_map_area(dev, addr, s->length, dir, 0); - if (addr == bad_dma_address) { + if (addr == bad_dma_addr) { if (i > 0) gart_unmap_sg(dev, sg, i, dir, NULL); nents = 0; @@ -389,12 +392,14 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (!dev) dev = &x86_dma_fallback_dev; - out = 0; - start = 0; - start_sg = sgmap = sg; - seg_size = 0; - max_seg_size = dma_get_max_seg_size(dev); - ps = NULL; /* shut up gcc */ + out = 0; + start = 0; + start_sg = sg; + sgmap = sg; + seg_size = 0; + max_seg_size = dma_get_max_seg_size(dev); + ps = NULL; /* shut up gcc */ + for_each_sg(sg, s, nents, i) { dma_addr_t addr = sg_phys(s); @@ -417,11 +422,12 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, sgmap, pages, need) < 0) goto error; out++; - seg_size = 0; - sgmap = sg_next(sgmap); - pages = 0; - start = i; - start_sg = s; + + seg_size = 0; + sgmap = sg_next(sgmap); + pages = 0; + start = i; + start_sg = s; } } @@ -455,7 +461,7 @@ error: iommu_full(dev, pages << PAGE_SHIFT, dir); for_each_sg(sg, s, nents, i) - s->dma_address = bad_dma_address; + s->dma_address = bad_dma_addr; return 0; } @@ -479,7 +485,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, DMA_BIDIRECTIONAL, align_mask); flush_gart(); - if (paddr != bad_dma_address) { + if (paddr != bad_dma_addr) { *dma_addr = paddr; return page_address(page); } @@ -499,6 +505,11 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, free_pages((unsigned long)vaddr, get_order(size)); } +static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return (dma_addr == bad_dma_addr); +} + static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -515,7 +526,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; if (iommu_size < 64*1024*1024) { - printk(KERN_WARNING + pr_warning( "PCI-DMA: Warning: Small IOMMU %luMB." " Consider increasing the AGP aperture in BIOS\n", iommu_size >> 20); @@ -570,28 +581,32 @@ void set_up_gart_resume(u32 aper_order, u32 aper_alloc) aperture_alloc = aper_alloc; } -static int gart_resume(struct sys_device *dev) +static void gart_fixup_northbridges(struct sys_device *dev) { - printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n"); + int i; - if (fix_up_north_bridges) { - int i; + if (!fix_up_north_bridges) + return; - printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n"); + pr_info("PCI-DMA: Restoring GART aperture settings\n"); - for (i = 0; i < num_k8_northbridges; i++) { - struct pci_dev *dev = k8_northbridges[i]; + for (i = 0; i < num_k8_northbridges; i++) { + struct pci_dev *dev = k8_northbridges[i]; - /* - * Don't enable translations just yet. That is the next - * step. Restore the pre-suspend aperture settings. - */ - pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, - aperture_order << 1); - pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, - aperture_alloc >> 25); - } + /* + * Don't enable translations just yet. That is the next + * step. Restore the pre-suspend aperture settings. + */ + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1); + pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); } +} + +static int gart_resume(struct sys_device *dev) +{ + pr_info("PCI-DMA: Resuming GART IOMMU\n"); + + gart_fixup_northbridges(dev); enable_gart_translations(); @@ -604,15 +619,14 @@ static int gart_suspend(struct sys_device *dev, pm_message_t state) } static struct sysdev_class gart_sysdev_class = { - .name = "gart", - .suspend = gart_suspend, - .resume = gart_resume, + .name = "gart", + .suspend = gart_suspend, + .resume = gart_resume, }; static struct sys_device device_gart = { - .id = 0, - .cls = &gart_sysdev_class, + .cls = &gart_sysdev_class, }; /* @@ -627,7 +641,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) void *gatt; int i, error; - printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); + pr_info("PCI-DMA: Disabling AGP.\n"); + aper_size = aper_base = info->aper_size = 0; dev = NULL; for (i = 0; i < num_k8_northbridges; i++) { @@ -645,6 +660,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) } if (!aper_base) goto nommu; + info->aper_base = aper_base; info->aper_size = aper_size >> 20; @@ -667,14 +683,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info) flush_gart(); - printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", + pr_info("PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10); return 0; nommu: /* Should not happen anymore */ - printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n" + pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n" "falling back to iommu=soft.\n"); return -1; } @@ -686,14 +702,15 @@ static struct dma_map_ops gart_dma_ops = { .unmap_page = gart_unmap_page, .alloc_coherent = gart_alloc_coherent, .free_coherent = gart_free_coherent, + .mapping_error = gart_mapping_error, }; -void gart_iommu_shutdown(void) +static void gart_iommu_shutdown(void) { struct pci_dev *dev; int i; - if (no_agp && (dma_ops != &gart_dma_ops)) + if (no_agp) return; for (i = 0; i < num_k8_northbridges; i++) { @@ -708,7 +725,7 @@ void gart_iommu_shutdown(void) } } -void __init gart_iommu_init(void) +int __init gart_iommu_init(void) { struct agp_kern_info info; unsigned long iommu_start; @@ -718,7 +735,7 @@ void __init gart_iommu_init(void) long i; if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) - return; + return 0; #ifndef CONFIG_AGP_AMD64 no_agp = 1; @@ -730,35 +747,28 @@ void __init gart_iommu_init(void) (agp_copy_info(agp_bridge, &info) < 0); #endif - if (swiotlb) - return; - - /* Did we detect a different HW IOMMU? */ - if (iommu_detected && !gart_iommu_aperture) - return; - if (no_iommu || (!force_iommu && max_pfn <= MAX_DMA32_PFN) || !gart_iommu_aperture || (no_agp && init_k8_gatt(&info) < 0)) { if (max_pfn > MAX_DMA32_PFN) { - printk(KERN_WARNING "More than 4GB of memory " - "but GART IOMMU not available.\n"); - printk(KERN_WARNING "falling back to iommu=soft.\n"); + pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); + pr_warning("falling back to iommu=soft.\n"); } - return; + return 0; } /* need to map that range */ - aper_size = info.aper_size << 20; - aper_base = info.aper_base; - end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); + aper_size = info.aper_size << 20; + aper_base = info.aper_base; + end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); + if (end_pfn > max_low_pfn_mapped) { start_pfn = (aper_base>>PAGE_SHIFT); init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); } - printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); + pr_info("PCI-DMA: using GART IOMMU.\n"); iommu_size = check_iommu_size(info.aper_base, aper_size); iommu_pages = iommu_size >> PAGE_SHIFT; @@ -773,8 +783,7 @@ void __init gart_iommu_init(void) ret = dma_debug_resize_entries(iommu_pages); if (ret) - printk(KERN_DEBUG - "PCI-DMA: Cannot trace all the entries\n"); + pr_debug("PCI-DMA: Cannot trace all the entries\n"); } #endif @@ -784,15 +793,14 @@ void __init gart_iommu_init(void) */ iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); - agp_memory_reserved = iommu_size; - printk(KERN_INFO - "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", + pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", iommu_size >> 20); - iommu_start = aper_size - iommu_size; - iommu_bus_base = info.aper_base + iommu_start; - bad_dma_address = iommu_bus_base; - iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); + agp_memory_reserved = iommu_size; + iommu_start = aper_size - iommu_size; + iommu_bus_base = info.aper_base + iommu_start; + bad_dma_addr = iommu_bus_base; + iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); /* * Unmap the IOMMU part of the GART. The alias of the page is @@ -814,7 +822,7 @@ void __init gart_iommu_init(void) * the pages as Not-Present: */ wbinvd(); - + /* * Now all caches are flushed and we can safely enable * GART hardware. Doing it early leaves the possibility @@ -838,6 +846,10 @@ void __init gart_iommu_init(void) flush_gart(); dma_ops = &gart_dma_ops; + x86_platform.iommu_shutdown = gart_iommu_shutdown; + swiotlb = 0; + + return 0; } void __init gart_parse_options(char *p) @@ -856,7 +868,7 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; - if (!strncmp(p, "fullflush", 8)) + if (!strncmp(p, "fullflush", 9)) iommu_fullflush = 1; if (!strncmp(p, "nofullflush", 11)) iommu_fullflush = 0; diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index a3933d4330c..22be12b60a8 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -33,7 +33,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, dma_addr_t bus = page_to_phys(page) + offset; WARN_ON(size == 0); if (!check_addr("map_single", dev, bus, size)) - return bad_dma_address; + return DMA_ERROR_CODE; flush_write_buffers(); return bus; } @@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = { .sync_sg_for_device = nommu_sync_sg_for_device, .is_phys = 1, }; - -void __init no_iommu_init(void) -{ - if (dma_ops) - return; - - force_iommu = 0; /* no HW IOMMU */ - dma_ops = &nommu_dma_ops; -} diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index aaa6b7839f1..e3c0a66b9e7 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -42,18 +42,28 @@ static struct dma_map_ops swiotlb_dma_ops = { .dma_supported = NULL, }; -void __init pci_swiotlb_init(void) +/* + * pci_swiotlb_init - initialize swiotlb if necessary + * + * This returns non-zero if we are forced to use swiotlb (by the boot + * option). + */ +int __init pci_swiotlb_init(void) { + int use_swiotlb = swiotlb | swiotlb_force; + /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 - if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) + if (!no_iommu && max_pfn > MAX_DMA32_PFN) swiotlb = 1; #endif if (swiotlb_force) swiotlb = 1; + if (swiotlb) { - printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); - swiotlb_init(); + swiotlb_init(0); dma_ops = &swiotlb_dma_ops; } + + return use_swiotlb; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5284cd2b577..744508e7cfd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -10,6 +10,7 @@ #include <linux/clockchips.h> #include <linux/random.h> #include <trace/events/power.h> +#include <linux/hw_breakpoint.h> #include <asm/system.h> #include <asm/apic.h> #include <asm/syscalls.h> @@ -17,6 +18,7 @@ #include <asm/uaccess.h> #include <asm/i387.h> #include <asm/ds.h> +#include <asm/debugreg.h> unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -103,14 +105,7 @@ void flush_thread(void) } #endif - clear_tsk_thread_flag(tsk, TIF_DEBUG); - - tsk->thread.debugreg0 = 0; - tsk->thread.debugreg1 = 0; - tsk->thread.debugreg2 = 0; - tsk->thread.debugreg3 = 0; - tsk->thread.debugreg6 = 0; - tsk->thread.debugreg7 = 0; + flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. @@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, else if (next->debugctlmsr != prev->debugctlmsr) update_debugctlmsr(next->debugctlmsr); - if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg0, 0); - set_debugreg(next->debugreg1, 1); - set_debugreg(next->debugreg2, 2); - set_debugreg(next->debugreg3, 3); - /* no 4 and 5 */ - set_debugreg(next->debugreg6, 6); - set_debugreg(next->debugreg7, 7); - } - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4cf79567cda..075580b3568 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -58,6 +58,7 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/ds.h> +#include <asm/debugreg.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -134,7 +135,7 @@ void __show_regs(struct pt_regs *regs, int all) ss = regs->ss & 0xffff; gs = get_user_gs(regs); } else { - sp = (unsigned long) (®s->sp); + sp = kernel_stack_pointer(regs); savesegment(ss, ss); savesegment(gs, gs); } @@ -187,7 +188,7 @@ void __show_regs(struct pt_regs *regs, int all) void show_regs(struct pt_regs *regs) { - __show_regs(regs, 1); + show_registers(regs); show_trace(NULL, regs, ®s->sp, regs->bp); } @@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, task_user_gs(p) = get_user_gs(regs); + p->thread.io_bitmap_ptr = NULL; tsk = current; + err = -ENOMEM; + + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index eb62cbcaa49..a98fe88fab6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -52,6 +52,7 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/ds.h> +#include <asm/debugreg.h> asmlinkage extern void ret_from_fork(void); @@ -226,8 +227,7 @@ void __show_regs(struct pt_regs *regs, int all) void show_regs(struct pt_regs *regs) { - printk(KERN_INFO "CPU %d:", smp_processor_id()); - __show_regs(regs, 1); + show_registers(regs); show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } @@ -297,12 +297,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.fs = me->thread.fs; p->thread.gs = me->thread.gs; + p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); savesegment(fs, p->thread.fsindex); savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + err = -ENOMEM; + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -341,6 +345,7 @@ out: kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } + return err; } @@ -495,6 +500,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (preload_fpu) __math_state_restore(); + return prev_p; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7b058a2dc66..04d182a7cfd 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -22,6 +22,8 @@ #include <linux/seccomp.h> #include <linux/signal.h> #include <linux/workqueue.h> +#include <linux/perf_event.h> +#include <linux/hw_breakpoint.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -34,6 +36,7 @@ #include <asm/prctl.h> #include <asm/proto.h> #include <asm/ds.h> +#include <asm/hw_breakpoint.h> #include "tls.h" @@ -49,6 +52,118 @@ enum x86_regset { REGSET_IOPERM32, }; +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { +#ifdef CONFIG_X86_64 + REG_OFFSET_NAME(r15), + REG_OFFSET_NAME(r14), + REG_OFFSET_NAME(r13), + REG_OFFSET_NAME(r12), + REG_OFFSET_NAME(r11), + REG_OFFSET_NAME(r10), + REG_OFFSET_NAME(r9), + REG_OFFSET_NAME(r8), +#endif + REG_OFFSET_NAME(bx), + REG_OFFSET_NAME(cx), + REG_OFFSET_NAME(dx), + REG_OFFSET_NAME(si), + REG_OFFSET_NAME(di), + REG_OFFSET_NAME(bp), + REG_OFFSET_NAME(ax), +#ifdef CONFIG_X86_32 + REG_OFFSET_NAME(ds), + REG_OFFSET_NAME(es), + REG_OFFSET_NAME(fs), + REG_OFFSET_NAME(gs), +#endif + REG_OFFSET_NAME(orig_ax), + REG_OFFSET_NAME(ip), + REG_OFFSET_NAME(cs), + REG_OFFSET_NAME(flags), + REG_OFFSET_NAME(sp), + REG_OFFSET_NAME(ss), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + +static const int arg_offs_table[] = { +#ifdef CONFIG_X86_32 + [0] = offsetof(struct pt_regs, ax), + [1] = offsetof(struct pt_regs, dx), + [2] = offsetof(struct pt_regs, cx) +#else /* CONFIG_X86_64 */ + [0] = offsetof(struct pt_regs, di), + [1] = offsetof(struct pt_regs, si), + [2] = offsetof(struct pt_regs, dx), + [3] = offsetof(struct pt_regs, cx), + [4] = offsetof(struct pt_regs, r8), + [5] = offsetof(struct pt_regs, r9) +#endif +}; + +/** + * regs_get_argument_nth() - get Nth argument at function call + * @regs: pt_regs which contains registers at function entry. + * @n: argument number. + * + * regs_get_argument_nth() returns @n th argument of a function call. + * Since usually the kernel stack will be changed right after function entry, + * you must use this at function entry. If the @n th entry is NOT in the + * kernel stack or pt_regs, this returns 0. + */ +unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) +{ + if (n < ARRAY_SIZE(arg_offs_table)) + return *(unsigned long *)((char *)regs + arg_offs_table[n]); + else { + /* + * The typical case: arg n is on the stack. + * (Note: stack[0] = return address, so skip it) + */ + n -= ARRAY_SIZE(arg_offs_table); + return regs_get_kernel_stack_nth(regs, 1 + n); + } +} + /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. @@ -137,11 +252,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ - return TASK_SIZE - 3; -} - #else /* CONFIG_X86_64 */ #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) @@ -266,15 +376,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - return IA32_PAGE_OFFSET - 3; -#endif - return TASK_SIZE_MAX - 7; -} - #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) @@ -454,99 +555,239 @@ static int genregs_set(struct task_struct *target, return ret; } +static void ptrace_triggered(struct perf_event *bp, void *data) +{ + int i; + struct thread_struct *thread = &(current->thread); + + /* + * Store in the virtual DR6 register the fact that the breakpoint + * was hit so the thread's debugger will see it. + */ + for (i = 0; i < HBP_NUM; i++) { + if (thread->ptrace_bps[i] == bp) + break; + } + + thread->debugreg6 |= (DR_TRAP0 << i); +} + /* - * This function is trivial and will be inlined by the compiler. - * Having it separates the implementation details of debug - * registers from the interface details of ptrace. + * Walk through every ptrace breakpoints for this thread and + * build the dr7 value on top of their attributes. + * */ -static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) +static unsigned long ptrace_get_dr7(struct perf_event *bp[]) { - switch (n) { - case 0: return child->thread.debugreg0; - case 1: return child->thread.debugreg1; - case 2: return child->thread.debugreg2; - case 3: return child->thread.debugreg3; - case 6: return child->thread.debugreg6; - case 7: return child->thread.debugreg7; + int i; + int dr7 = 0; + struct arch_hw_breakpoint *info; + + for (i = 0; i < HBP_NUM; i++) { + if (bp[i] && !bp[i]->attr.disabled) { + info = counter_arch_bp(bp[i]); + dr7 |= encode_dr7(i, info->len, info->type); + } } - return 0; + + return dr7; } -static int ptrace_set_debugreg(struct task_struct *child, - int n, unsigned long data) +static struct perf_event * +ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, + struct task_struct *tsk, int disabled) { - int i; + int err; + int gen_len, gen_type; + DEFINE_BREAKPOINT_ATTR(attr); - if (unlikely(n == 4 || n == 5)) - return -EIO; + /* + * We shoud have at least an inactive breakpoint at this + * slot. It means the user is writing dr7 without having + * written the address register first + */ + if (!bp) + return ERR_PTR(-EINVAL); - if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) - return -EIO; + err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); + if (err) + return ERR_PTR(err); - switch (n) { - case 0: child->thread.debugreg0 = data; break; - case 1: child->thread.debugreg1 = data; break; - case 2: child->thread.debugreg2 = data; break; - case 3: child->thread.debugreg3 = data; break; + attr = bp->attr; + attr.bp_len = gen_len; + attr.bp_type = gen_type; + attr.disabled = disabled; - case 6: - if ((data & ~0xffffffffUL) != 0) - return -EIO; - child->thread.debugreg6 = data; - break; + return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); +} + +/* + * Handle ptrace writes to debug register 7. + */ +static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long old_dr7; + int i, orig_ret = 0, rc = 0; + int enabled, second_pass = 0; + unsigned len, type; + struct perf_event *bp; + + data &= ~DR_CONTROL_RESERVED; + old_dr7 = ptrace_get_dr7(thread->ptrace_bps); +restore: + /* + * Loop through all the hardware breakpoints, making the + * appropriate changes to each. + */ + for (i = 0; i < HBP_NUM; i++) { + enabled = decode_dr7(data, i, &len, &type); + bp = thread->ptrace_bps[i]; + + if (!enabled) { + if (bp) { + /* + * Don't unregister the breakpoints right-away, + * unless all register_user_hw_breakpoint() + * requests have succeeded. This prevents + * any window of opportunity for debug + * register grabbing by other users. + */ + if (!second_pass) + continue; + + thread->ptrace_bps[i] = NULL; + bp = ptrace_modify_breakpoint(bp, len, type, + tsk, 1); + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + thread->ptrace_bps[i] = NULL; + break; + } + thread->ptrace_bps[i] = bp; + } + continue; + } + + bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0); + + /* Incorrect bp, or we have a bug in bp API */ + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + thread->ptrace_bps[i] = NULL; + break; + } + thread->ptrace_bps[i] = bp; + } + /* + * Make a second pass to free the remaining unused breakpoints + * or to restore the original breakpoints if an error occurred. + */ + if (!second_pass) { + second_pass = 1; + if (rc < 0) { + orig_ret = rc; + data = old_dr7; + } + goto restore; + } + return ((orig_ret < 0) ? orig_ret : rc); +} + +/* + * Handle PTRACE_PEEKUSR calls for the debug register area. + */ +static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long val = 0; - case 7: + if (n < HBP_NUM) { + struct perf_event *bp; + bp = thread->ptrace_bps[n]; + if (!bp) + return 0; + val = bp->hw.info.address; + } else if (n == 6) { + val = thread->debugreg6; + } else if (n == 7) { + val = ptrace_get_dr7(thread->ptrace_bps); + } + return val; +} + +static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, + unsigned long addr) +{ + struct perf_event *bp; + struct thread_struct *t = &tsk->thread; + DEFINE_BREAKPOINT_ATTR(attr); + + if (!t->ptrace_bps[nr]) { /* - * Sanity-check data. Take one half-byte at once with - * check = (val >> (16 + 4*i)) & 0xf. It contains the - * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits - * 2 and 3 are LENi. Given a list of invalid values, - * we do mask |= 1 << invalid_value, so that - * (mask >> check) & 1 is a correct test for invalid - * values. - * - * R/Wi contains the type of the breakpoint / - * watchpoint, LENi contains the length of the watched - * data in the watchpoint case. - * - * The invalid values are: - * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] - * - R/Wi == 0x10 (break on I/O reads or writes), so - * mask |= 0x4444. - * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= - * 0x1110. - * - * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. - * - * See the Intel Manual "System Programming Guide", - * 15.2.4 - * - * Note that LENi == 0x10 is defined on x86_64 in long - * mode (i.e. even for 32-bit userspace software, but - * 64-bit kernel), so the x86_64 mask value is 0x5454. - * See the AMD manual no. 24593 (AMD64 System Programming) + * Put stub len and type to register (reserve) an inactive but + * correct bp */ -#ifdef CONFIG_X86_32 -#define DR7_MASK 0x5f54 -#else -#define DR7_MASK 0x5554 -#endif - data &= ~DR_CONTROL_RESERVED; - for (i = 0; i < 4; i++) - if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) - return -EIO; - child->thread.debugreg7 = data; - if (data) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - break; + attr.bp_addr = addr; + attr.bp_len = HW_BREAKPOINT_LEN_1; + attr.bp_type = HW_BREAKPOINT_W; + attr.disabled = 1; + + bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); + } else { + bp = t->ptrace_bps[nr]; + t->ptrace_bps[nr] = NULL; + + attr = bp->attr; + attr.bp_addr = addr; + bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); } + /* + * CHECKME: the previous code returned -EIO if the addr wasn't a + * valid task virtual addr. The new one will return -EINVAL in this + * case. + * -EINVAL may be what we want for in-kernel breakpoints users, but + * -EIO looks better for ptrace, since we refuse a register writing + * for the user. And anyway this is the previous behaviour. + */ + if (IS_ERR(bp)) + return PTR_ERR(bp); + + t->ptrace_bps[nr] = bp; return 0; } /* + * Handle PTRACE_POKEUSR calls for the debug register area. + */ +int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) +{ + struct thread_struct *thread = &(tsk->thread); + int rc = 0; + + /* There are no DR4 or DR5 registers */ + if (n == 4 || n == 5) + return -EIO; + + if (n == 6) { + thread->debugreg6 = val; + goto ret_path; + } + if (n < HBP_NUM) { + rc = ptrace_set_breakpoint_addr(tsk, n, val); + if (rc) + return rc; + } + /* All that's left is DR7 */ + if (n == 7) + rc = ptrace_write_dr7(tsk, val); + +ret_path: + return rc; +} + +/* * These access the current or another (stopped) task's io permission * bitmap for debugging or core dump. */ diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f93078746e0..2b97fc5b124 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -23,7 +23,7 @@ # include <linux/ctype.h> # include <linux/mc146818rtc.h> #else -# include <asm/iommu.h> +# include <asm/x86_init.h> #endif /* @@ -622,7 +622,7 @@ void native_machine_shutdown(void) #endif #ifdef CONFIG_X86_64 - pci_iommu_shutdown(); + x86_platform.iommu_shutdown(); #endif } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2a34f9c5be2..82e88cdda9b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -109,6 +109,7 @@ #ifdef CONFIG_X86_64 #include <asm/numa_64.h> #endif +#include <asm/mce.h> /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -247,7 +248,7 @@ EXPORT_SYMBOL(edd); * from boot_params into a safe place. * */ -static inline void copy_edd(void) +static inline void __init copy_edd(void) { memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, sizeof(edd.mbr_signature)); @@ -256,7 +257,7 @@ static inline void copy_edd(void) edd.edd_info_nr = boot_params.eddbuf_entries; } #else -static inline void copy_edd(void) +static inline void __init copy_edd(void) { } #endif @@ -1031,6 +1032,8 @@ void __init setup_arch(char **cmdline_p) #endif #endif x86_init.oem.banner(); + + mcheck_init(); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6a44a76055a..fbf3b07c856 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { - /* - * Re-enable any watchpoints before delivering the - * signal to user space. The processor register will - * have been cleared if the watchpoint triggered - * inside the kernel. - */ - if (current->thread.debugreg7) - set_debugreg(current->thread.debugreg7, 7); - /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 565ebc65920..324f2a44c22 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1250,16 +1250,7 @@ static void __ref remove_cpu_from_maps(int cpu) void cpu_disable_common(void) { int cpu = smp_processor_id(); - /* - * HACK: - * Allow any queued timer interrupts to get serviced - * This is only a temporary solution until we cleanup - * fixup_irqs as we do for IA64. - */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); remove_siblinginfo(cpu); /* It's now safe to remove this processor from the online map */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7e37dcee0cc..33399176512 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - unsigned long condition; + unsigned long dr6; int si_code; - get_debugreg(condition, 6); + get_debugreg(dr6, 6); /* Catch kmemcheck conditions first of all! */ - if (condition & DR_STEP && kmemcheck_trap(regs)) + if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) return; + /* DR6 may or may not be cleared by the CPU */ + set_debugreg(0, 6); /* * The processor cleared BTF, so don't mark that we need it set. */ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); tsk->thread.debugctlmsr = 0; - if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, - SIGTRAP) == NOTIFY_STOP) + /* Store the virtualized DR6 value */ + tsk->thread.debugreg6 = dr6; + + if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, + SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); - /* Mask out spurious debug traps due to lazy DR7 setting */ - if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg7) - goto clear_dr7; + if (regs->flags & X86_VM_MASK) { + handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, 1); + return; } -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto debug_vm86; -#endif - - /* Save debug status register where ptrace can see it */ - tsk->thread.debugreg6 = condition; - /* - * Single-stepping through TF: make sure we ignore any events in - * kernel space (but re-enable TF when returning to user mode). + * Single-stepping through system calls: ignore any exceptions in + * kernel space, but re-enable TF when returning to user mode. + * + * We already checked v86 mode above, so we can check for kernel mode + * by just checking the CPL of CS. */ - if (condition & DR_STEP) { - if (!user_mode(regs)) - goto clear_TF_reenable; + if ((dr6 & DR_STEP) && !user_mode(regs)) { + tsk->thread.debugreg6 &= ~DR_STEP; + set_tsk_thread_flag(tsk, TIF_SINGLESTEP); + regs->flags &= ~X86_EFLAGS_TF; } - - si_code = get_si_code(condition); - /* Ok, finally something we can handle */ - send_sigtrap(tsk, regs, error_code, si_code); - - /* - * Disable additional traps. They'll be re-enabled when - * the signal is delivered. - */ -clear_dr7: - set_debugreg(0, 7); + si_code = get_si_code(tsk->thread.debugreg6); + if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) + send_sigtrap(tsk, regs, error_code, si_code); preempt_conditional_cli(regs); - return; -#ifdef CONFIG_X86_32 -debug_vm86: - /* reenable preemption: handle_vm86_trap() might sleep */ - dec_preempt_count(); - handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - conditional_cli(regs); - return; -#endif - -clear_TF_reenable: - set_tsk_thread_flag(tsk, TIF_SINGLESTEP); - regs->flags &= ~X86_EFLAGS_TF; - preempt_conditional_cli(regs); return; } diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index f37930954d1..eed156851f5 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -114,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu) return; if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { - printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n"); + if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) + pr_info( + "Skipped synchronization checks as TSC is reliable.\n"); return; } - pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:", - smp_processor_id(), cpu); - /* * Reset it - in case this is a second bootup: */ @@ -142,12 +141,14 @@ void __cpuinit check_tsc_sync_source(int cpu) cpu_relax(); if (nr_warps) { - printk("\n"); + pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", + smp_processor_id(), cpu); pr_warning("Measured %Ld cycles TSC warp between CPUs, " "turning off TSC clock.\n", max_warp); mark_tsc_unstable("check_tsc_sync_source failed"); } else { - printk(" passed.\n"); + pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", + smp_processor_id(), cpu); } /* diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index aeef529917e..61d805df4c9 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -9,10 +9,25 @@ */ #include <linux/module.h> +#include <linux/rbtree.h> #include <linux/irq.h> #include <asm/apic.h> #include <asm/uv/uv_irq.h> +#include <asm/uv/uv_hub.h> + +/* MMR offset and pnode of hub sourcing interrupts for a given irq */ +struct uv_irq_2_mmr_pnode{ + struct rb_node list; + unsigned long offset; + int pnode; + int irq; +}; + +static spinlock_t uv_irq_lock; +static struct rb_root uv_irq_root; + +static int uv_set_irq_affinity(unsigned int, const struct cpumask *); static void uv_noop(unsigned int irq) { @@ -39,25 +54,214 @@ struct irq_chip uv_irq_chip = { .unmask = uv_noop, .eoi = uv_ack_apic, .end = uv_noop, + .set_affinity = uv_set_irq_affinity, }; /* + * Add offset and pnode information of the hub sourcing interrupts to the + * rb tree for a specific irq. + */ +static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) +{ + struct rb_node **link = &uv_irq_root.rb_node; + struct rb_node *parent = NULL; + struct uv_irq_2_mmr_pnode *n; + struct uv_irq_2_mmr_pnode *e; + unsigned long irqflags; + + n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, + uv_blade_to_memory_nid(blade)); + if (!n) + return -ENOMEM; + + n->irq = irq; + n->offset = offset; + n->pnode = uv_blade_to_pnode(blade); + spin_lock_irqsave(&uv_irq_lock, irqflags); + /* Find the right place in the rbtree: */ + while (*link) { + parent = *link; + e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); + + if (unlikely(irq == e->irq)) { + /* irq entry exists */ + e->pnode = uv_blade_to_pnode(blade); + e->offset = offset; + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + kfree(n); + return 0; + } + + if (irq < e->irq) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + /* Insert the node into the rbtree. */ + rb_link_node(&n->list, parent, link); + rb_insert_color(&n->list, &uv_irq_root); + + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return 0; +} + +/* Retrieve offset and pnode information from the rb tree for a specific irq */ +int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) +{ + struct uv_irq_2_mmr_pnode *e; + struct rb_node *n; + unsigned long irqflags; + + spin_lock_irqsave(&uv_irq_lock, irqflags); + n = uv_irq_root.rb_node; + while (n) { + e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); + + if (e->irq == irq) { + *offset = e->offset; + *pnode = e->pnode; + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return 0; + } + + if (irq < e->irq) + n = n->rb_left; + else + n = n->rb_right; + } + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return -1; +} + +/* + * Re-target the irq to the specified CPU and enable the specified MMR located + * on the specified blade to allow the sending of MSIs to the specified CPU. + */ +static int +arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, + unsigned long mmr_offset, int restrict) +{ + const struct cpumask *eligible_cpu = cpumask_of(cpu); + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg; + int mmr_pnode; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + int err; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + cfg = irq_cfg(irq); + + err = assign_irq_vector(irq, cfg, eligible_cpu); + if (err != 0) + return err; + + if (restrict == UV_AFFINITY_CPU) + desc->status |= IRQ_NO_BALANCING; + else + desc->status |= IRQ_MOVE_PCNTXT; + + set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, + irq_name); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return irq; +} + +/* + * Disable the specified MMR located on the specified blade so that MSIs are + * longer allowed to be sent. + */ +static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) +{ + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->mask = 1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +} + +static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; + unsigned int dest; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + unsigned long mmr_offset; + unsigned mmr_pnode; + + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) + return -1; + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = dest; + + /* Get previously stored MMR and pnode of hub sourcing interrupts */ + if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) + return -1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return 0; +} + +/* * Set up a mapping of an available irq and vector, and enable the specified * MMR that defines the MSI that is to be sent to the specified CPU when an * interrupt is raised. */ int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, - unsigned long mmr_offset) + unsigned long mmr_offset, int restrict) { - int irq; - int ret; + int irq, ret; + + irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); - irq = create_irq(); if (irq <= 0) return -EBUSY; - ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset); - if (ret != irq) + ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, + restrict); + if (ret == irq) + uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); + else destroy_irq(irq); return ret; @@ -71,9 +275,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq); * * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). */ -void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset) +void uv_teardown_irq(unsigned int irq) { - arch_disable_uv_irq(mmr_blade, mmr_offset); + struct uv_irq_2_mmr_pnode *e; + struct rb_node *n; + unsigned long irqflags; + + spin_lock_irqsave(&uv_irq_lock, irqflags); + n = uv_irq_root.rb_node; + while (n) { + e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); + if (e->irq == irq) { + arch_disable_uv_irq(e->pnode, e->offset); + rb_erase(n, &uv_irq_root); + kfree(e); + break; + } + if (irq < e->irq) + n = n->rb_left; + else + n = n->rb_right; + } + spin_unlock_irqrestore(&uv_irq_lock, irqflags); destroy_irq(irq); } EXPORT_SYMBOL_GPL(uv_teardown_irq); diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index f068553a1b1..abda6f53e71 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -183,7 +183,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) return; } - apic_cpus = apic->apicid_to_cpu_present(m->apicid); + apic->apicid_to_cpu_present(m->apicid, &apic_cpus); physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); /* * Validate version @@ -486,7 +486,7 @@ static void end_cobalt_irq(unsigned int irq) } static struct irq_chip cobalt_irq_type = { - .typename = "Cobalt-APIC", + .name = "Cobalt-APIC", .startup = startup_cobalt_irq, .shutdown = disable_cobalt_irq, .enable = enable_cobalt_irq, @@ -523,7 +523,7 @@ static void end_piix4_master_irq(unsigned int irq) } static struct irq_chip piix4_master_irq_type = { - .typename = "PIIX4-master", + .name = "PIIX4-master", .startup = startup_piix4_master_irq, .ack = ack_cobalt_irq, .end = end_piix4_master_irq, @@ -531,7 +531,7 @@ static struct irq_chip piix4_master_irq_type = { static struct irq_chip piix4_virtual_irq_type = { - .typename = "PIIX4-virtual", + .name = "PIIX4-virtual", .shutdown = disable_8259A_irq, .enable = enable_8259A_irq, .disable = disable_8259A_irq, diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 3909e3ba5ce..a1029769b6f 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -30,9 +30,8 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(copy_user_generic); EXPORT_SYMBOL(__copy_user_nocache); -EXPORT_SYMBOL(copy_from_user); -EXPORT_SYMBOL(copy_to_user); -EXPORT_SYMBOL(__copy_from_user_inatomic); +EXPORT_SYMBOL(_copy_from_user); +EXPORT_SYMBOL(_copy_to_user); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 4449a4a2c2e..d11c5ff7c65 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -14,10 +14,13 @@ #include <asm/time.h> #include <asm/irq.h> #include <asm/tsc.h> +#include <asm/iommu.h> void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } void __init x86_init_pgd_noop(pgd_t *unused) { } +int __init iommu_init_noop(void) { return 0; } +void iommu_shutdown_noop(void) { } /* * The platform setup functions are preset with the default functions @@ -62,6 +65,10 @@ struct x86_init_ops x86_init __initdata = { .tsc_pre_init = x86_init_noop, .timer_init = hpet_time_init, }, + + .iommu = { + .iommu_init = iommu_init_noop, + }, }; struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { @@ -72,4 +79,5 @@ struct x86_platform_ops x86_platform = { .calibrate_tsc = native_calibrate_tsc, .get_wallclock = mach_get_cmos_time, .set_wallclock = mach_set_rtc_mmss, + .iommu_shutdown = iommu_shutdown_noop, }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ae07d261527..4fc80174191 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -42,6 +42,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" +#include <asm/debugreg.h> #include <asm/uaccess.h> #include <asm/msr.h> #include <asm/desc.h> @@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) trace_kvm_entry(vcpu->vcpu_id); kvm_x86_ops->run(vcpu, kvm_run); - if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { - set_debugreg(current->thread.debugreg0, 0); - set_debugreg(current->thread.debugreg1, 1); - set_debugreg(current->thread.debugreg2, 2); - set_debugreg(current->thread.debugreg3, 3); - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); - } + /* + * If the guest has used debug registers, at least dr7 + * will be disabled while returning to the host. + * If we don't have active breakpoints in the host, we don't + * care about the messed up debug address registers. But if + * we have some of them active, restore the old state. + */ + if (hw_breakpoint_active()) + hw_breakpoint_restore(); set_bit(KVM_REQ_KICK, &vcpu->requests); local_irq_enable(); diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore new file mode 100644 index 00000000000..8df89f0a3fe --- /dev/null +++ b/arch/x86/lib/.gitignore @@ -0,0 +1 @@ +inat-tables.c diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 85f5db95c60..a2d6472895f 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -2,12 +2,25 @@ # Makefile for x86 specific library files. # +inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk +inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt +quiet_cmd_inat_tables = GEN $@ + cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ + +$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + $(call cmd,inat_tables) + +$(obj)/inat.o: $(obj)/inat-tables.c + +clean-files := inat-tables.c + obj-$(CONFIG_SMP) := msr.o lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o +lib-y += insn.o inat.o obj-y += msr-reg.o msr-reg-export.o diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 6ba0f7bb85e..cf889d4e076 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -65,7 +65,7 @@ .endm /* Standard copy_to_user with segment limit checking */ -ENTRY(copy_to_user) +ENTRY(_copy_to_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rdi,%rcx @@ -75,10 +75,10 @@ ENTRY(copy_to_user) jae bad_to_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_to_user) +ENDPROC(_copy_to_user) /* Standard copy_from_user with segment limit checking */ -ENTRY(copy_from_user) +ENTRY(_copy_from_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rsi,%rcx @@ -88,7 +88,7 @@ ENTRY(copy_from_user) jae bad_from_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_from_user) +ENDPROC(_copy_from_user) ENTRY(copy_user_generic) CFI_STARTPROC @@ -96,12 +96,6 @@ ENTRY(copy_user_generic) CFI_ENDPROC ENDPROC(copy_user_generic) -ENTRY(__copy_from_user_inatomic) - CFI_STARTPROC - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string - CFI_ENDPROC -ENDPROC(__copy_from_user_inatomic) - .section .fixup,"ax" /* must zero dest */ ENTRY(bad_from_user) diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c new file mode 100644 index 00000000000..46fc4ee09fc --- /dev/null +++ b/arch/x86/lib/inat.c @@ -0,0 +1,90 @@ +/* + * x86 instruction attribute tables + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include <asm/insn.h> + +/* Attribute tables are generated from opcode map */ +#include "inat-tables.c" + +/* Attribute search APIs */ +insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) +{ + return inat_primary_table[opcode]; +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, + insn_attr_t esc_attr) +{ + const insn_attr_t *table; + insn_attr_t lpfx_attr; + int n, m = 0; + + n = inat_escape_id(esc_attr); + if (last_pfx) { + lpfx_attr = inat_get_opcode_attribute(last_pfx); + m = inat_last_prefix_id(lpfx_attr); + } + table = inat_escape_tables[n][0]; + if (!table) + return 0; + if (inat_has_variant(table[opcode]) && m) { + table = inat_escape_tables[n][m]; + if (!table) + return 0; + } + return table[opcode]; +} + +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, + insn_attr_t grp_attr) +{ + const insn_attr_t *table; + insn_attr_t lpfx_attr; + int n, m = 0; + + n = inat_group_id(grp_attr); + if (last_pfx) { + lpfx_attr = inat_get_opcode_attribute(last_pfx); + m = inat_last_prefix_id(lpfx_attr); + } + table = inat_group_tables[n][0]; + if (!table) + return inat_group_common_attribute(grp_attr); + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { + table = inat_group_tables[n][m]; + if (!table) + return inat_group_common_attribute(grp_attr); + } + return table[X86_MODRM_REG(modrm)] | + inat_group_common_attribute(grp_attr); +} + +insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, + insn_byte_t vex_p) +{ + const insn_attr_t *table; + if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) + return 0; + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + return table[opcode]; +} + diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c new file mode 100644 index 00000000000..9f33b984d0e --- /dev/null +++ b/arch/x86/lib/insn.c @@ -0,0 +1,516 @@ +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004, 2009 + */ + +#include <linux/string.h> +#include <asm/inat.h> +#include <asm/insn.h> + +#define get_next(t, insn) \ + ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + +#define peek_next(t, insn) \ + ({t r; r = *(t*)insn->next_byte; r; }) + +#define peek_nbyte_next(t, insn, n) \ + ({t r; r = *(t*)((insn)->next_byte + n); r; }) + +/** + * insn_init() - initialize struct insn + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @x86_64: !0 for 64-bit kernel or 64-bit app + */ +void insn_init(struct insn *insn, const void *kaddr, int x86_64) +{ + memset(insn, 0, sizeof(*insn)); + insn->kaddr = kaddr; + insn->next_byte = kaddr; + insn->x86_64 = x86_64 ? 1 : 0; + insn->opnd_bytes = 4; + if (x86_64) + insn->addr_bytes = 8; + else + insn->addr_bytes = 4; +} + +/** + * insn_get_prefixes - scan x86 instruction prefix bytes + * @insn: &struct insn containing instruction + * + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. + */ +void insn_get_prefixes(struct insn *insn) +{ + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; + insn_byte_t b, lb; + int i, nb; + + if (prefixes->got) + return; + + nb = 0; + lb = 0; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + while (inat_is_legacy_prefix(attr)) { + /* Skip if same prefix */ + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == b) + goto found; + if (nb == 4) + /* Invalid instruction */ + break; + prefixes->bytes[nb++] = b; + if (inat_is_address_size_prefix(attr)) { + /* address size switches 2/4 or 4/8 */ + if (insn->x86_64) + insn->addr_bytes ^= 12; + else + insn->addr_bytes ^= 6; + } else if (inat_is_operand_size_prefix(attr)) { + /* oprand size switches 2/4 */ + insn->opnd_bytes ^= 6; + } +found: + prefixes->nbytes++; + insn->next_byte++; + lb = b; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + } + /* Set the last prefix */ + if (lb && lb != insn->prefixes.bytes[3]) { + if (unlikely(insn->prefixes.bytes[3])) { + /* Swap the last prefix */ + b = insn->prefixes.bytes[3]; + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == lb) + prefixes->bytes[i] = b; + } + insn->prefixes.bytes[3] = lb; + } + + /* Decode REX prefix */ + if (insn->x86_64) { + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_rex_prefix(attr)) { + insn->rex_prefix.value = b; + insn->rex_prefix.nbytes = 1; + insn->next_byte++; + if (X86_REX_W(b)) + /* REX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } + } + insn->rex_prefix.got = 1; + + /* Decode VEX prefix */ + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_vex_prefix(attr)) { + insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); + if (!insn->x86_64) { + /* + * In 32-bits mode, if the [7:6] bits (mod bits of + * ModRM) on the second byte are not 11b, it is + * LDS or LES. + */ + if (X86_MODRM_MOD(b2) != 3) + goto vex_end; + } + insn->vex_prefix.bytes[0] = b; + insn->vex_prefix.bytes[1] = b2; + if (inat_is_vex3_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + insn->vex_prefix.nbytes = 3; + insn->next_byte += 3; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else { + insn->vex_prefix.nbytes = 2; + insn->next_byte += 2; + } + } +vex_end: + insn->vex_prefix.got = 1; + + prefixes->got = 1; + return; +} + +/** + * insn_get_opcode - collect opcode(s) + * @insn: &struct insn containing instruction + * + * Populates @insn->opcode, updates @insn->next_byte to point past the + * opcode byte(s), and set @insn->attr (except for groups). + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. + */ +void insn_get_opcode(struct insn *insn) +{ + struct insn_field *opcode = &insn->opcode; + insn_byte_t op, pfx; + if (opcode->got) + return; + if (!insn->prefixes.got) + insn_get_prefixes(insn); + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[0] = op; + opcode->nbytes = 1; + + /* Check if there is VEX prefix or not */ + if (insn_is_avx(insn)) { + insn_byte_t m, p; + m = insn_vex_m_bits(insn); + p = insn_vex_p_bits(insn); + insn->attr = inat_get_avx_attribute(op, m, p); + if (!inat_accept_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ + goto end; /* VEX has only 1 byte for opcode */ + } + + insn->attr = inat_get_opcode_attribute(op); + while (inat_is_escape(insn->attr)) { + /* Get escaped opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[opcode->nbytes++] = op; + pfx = insn_last_prefix(insn); + insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); + } + if (inat_must_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ +end: + opcode->got = 1; +} + +/** + * insn_get_modrm - collect ModRM byte, if any + * @insn: &struct insn containing instruction + * + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + */ +void insn_get_modrm(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx, mod; + if (modrm->got) + return; + if (!insn->opcode.got) + insn_get_opcode(insn); + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); + modrm->value = mod; + modrm->nbytes = 1; + if (inat_is_group(insn->attr)) { + pfx = insn_last_prefix(insn); + insn->attr = inat_get_group_attribute(mod, pfx, + insn->attr); + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; + modrm->got = 1; +} + + +/** + * insn_rip_relative() - Does instruction use RIP-relative addressing mode? + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. No effect if @insn->x86_64 is 0. + */ +int insn_rip_relative(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + + if (!insn->x86_64) + return 0; + if (!modrm->got) + insn_get_modrm(insn); + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. + */ + return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); +} + +/** + * insn_get_sib() - Get the SIB byte of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. + */ +void insn_get_sib(struct insn *insn) +{ + insn_byte_t modrm; + + if (insn->sib.got) + return; + if (!insn->modrm.got) + insn_get_modrm(insn); + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && + X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { + insn->sib.value = get_next(insn_byte_t, insn); + insn->sib.nbytes = 1; + } + } + insn->sib.got = 1; +} + + +/** + * insn_get_displacement() - Get the displacement of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. + */ +void insn_get_displacement(struct insn *insn) +{ + insn_byte_t mod, rm, base; + + if (insn->displacement.got) + return; + if (!insn->sib.got) + insn_get_sib(insn); + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: + * mod = 00 - no displacement fields (exceptions below) + * mod = 01 - 1-byte displacement field + * mod = 10 - displacement field is 4 bytes, or 2 bytes if + * address size = 2 (0x67 prefix in 32-bit mode) + * mod = 11 - no memory operand + * + * If address size = 2... + * mod = 00, r/m = 110 - displacement field is 2 bytes + * + * If address size != 2... + * mod != 11, r/m = 100 - SIB byte exists + * mod = 00, SIB base = 101 - displacement field is 4 bytes + * mod = 00, r/m = 101 - rip-relative addressing, displacement + * field is 4 bytes + */ + mod = X86_MODRM_MOD(insn->modrm.value); + rm = X86_MODRM_RM(insn->modrm.value); + base = X86_SIB_BASE(insn->sib.value); + if (mod == 3) + goto out; + if (mod == 1) { + insn->displacement.value = get_next(char, insn); + insn->displacement.nbytes = 1; + } else if (insn->addr_bytes == 2) { + if ((mod == 0 && rm == 6) || mod == 2) { + insn->displacement.value = + get_next(short, insn); + insn->displacement.nbytes = 2; + } + } else { + if ((mod == 0 && rm == 5) || mod == 2 || + (mod == 0 && base == 5)) { + insn->displacement.value = get_next(int, insn); + insn->displacement.nbytes = 4; + } + } + } +out: + insn->displacement.got = 1; +} + +/* Decode moffset16/32/64 */ +static void __get_moffset(struct insn *insn) +{ + switch (insn->addr_bytes) { + case 2: + insn->moffset1.value = get_next(short, insn); + insn->moffset1.nbytes = 2; + break; + case 4: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + break; + case 8: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + insn->moffset2.value = get_next(int, insn); + insn->moffset2.nbytes = 4; + break; + } + insn->moffset1.got = insn->moffset2.got = 1; +} + +/* Decode imm v32(Iz) */ +static void __get_immv32(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case 4: + case 8: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + } +} + +/* Decode imm v64(Iv/Ov) */ +static void __get_immv(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + } + insn->immediate1.got = insn->immediate2.got = 1; +} + +/* Decode ptr16:16/32(Ap) */ +static void __get_immptr(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + /* ptr16:64 is not exist (no segment) */ + return; + } + insn->immediate2.value = get_next(unsigned short, insn); + insn->immediate2.nbytes = 2; + insn->immediate1.got = insn->immediate2.got = 1; +} + +/** + * insn_get_immediate() - Get the immediates of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be + * get by bit masking with ((1 << (nbytes * 8)) - 1) + */ +void insn_get_immediate(struct insn *insn) +{ + if (insn->immediate.got) + return; + if (!insn->displacement.got) + insn_get_displacement(insn); + + if (inat_has_moffset(insn->attr)) { + __get_moffset(insn); + goto done; + } + + if (!inat_has_immediate(insn->attr)) + /* no immediates */ + goto done; + + switch (inat_immediate_size(insn->attr)) { + case INAT_IMM_BYTE: + insn->immediate.value = get_next(char, insn); + insn->immediate.nbytes = 1; + break; + case INAT_IMM_WORD: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case INAT_IMM_DWORD: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + case INAT_IMM_QWORD: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + case INAT_IMM_PTR: + __get_immptr(insn); + break; + case INAT_IMM_VWORD32: + __get_immv32(insn); + break; + case INAT_IMM_VWORD: + __get_immv(insn); + break; + default: + break; + } + if (inat_has_second_immediate(insn->attr)) { + insn->immediate2.value = get_next(char, insn); + insn->immediate2.nbytes = 1; + } +done: + insn->immediate.got = 1; +} + +/** + * insn_get_length() - Get the length of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. + */ +void insn_get_length(struct insn *insn) +{ + if (insn->length) + return; + if (!insn->immediate.got) + insn_get_immediate(insn); + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); +} diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 33a1e3ca22d..41628b104b9 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -71,14 +71,9 @@ int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) } EXPORT_SYMBOL(wrmsr_on_cpu); -/* rdmsr on a bunch of CPUs - * - * @mask: which CPUs - * @msr_no: which MSR - * @msrs: array of MSR values - * - */ -void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) +static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, + struct msr *msrs, + void (*msr_func) (void *info)) { struct msr_info rv; int this_cpu; @@ -92,11 +87,23 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) this_cpu = get_cpu(); if (cpumask_test_cpu(this_cpu, mask)) - __rdmsr_on_cpu(&rv); + msr_func(&rv); - smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1); + smp_call_function_many(mask, msr_func, &rv, 1); put_cpu(); } + +/* rdmsr on a bunch of CPUs + * + * @mask: which CPUs + * @msr_no: which MSR + * @msrs: array of MSR values + * + */ +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +{ + __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); +} EXPORT_SYMBOL(rdmsr_on_cpus); /* @@ -107,24 +114,9 @@ EXPORT_SYMBOL(rdmsr_on_cpus); * @msrs: array of MSR values * */ -void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) { - struct msr_info rv; - int this_cpu; - - memset(&rv, 0, sizeof(rv)); - - rv.off = cpumask_first(mask); - rv.msrs = msrs; - rv.msr_no = msr_no; - - this_cpu = get_cpu(); - - if (cpumask_test_cpu(this_cpu, mask)) - __wrmsr_on_cpu(&rv); - - smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1); - put_cpu(); + __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); } EXPORT_SYMBOL(wrmsr_on_cpus); diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1f118d462ac..e218d5df85f 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -874,7 +874,7 @@ EXPORT_SYMBOL(copy_to_user); * data to the requested size using zero bytes. */ unsigned long -copy_from_user(void *to, const void __user *from, unsigned long n) +_copy_from_user(void *to, const void __user *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) n = __copy_from_user(to, from, n); @@ -882,4 +882,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) memset(to, 0, n); return n; } -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(_copy_from_user); + +void copy_from_user_overflow(void) +{ + WARN(1, "Buffer overflow detected!\n"); +} +EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt new file mode 100644 index 00000000000..a793da5e560 --- /dev/null +++ b/arch/x86/lib/x86-opcode-map.txt @@ -0,0 +1,893 @@ +# x86 Opcode Maps +# +#<Opcode maps> +# Table: table-name +# Referrer: escaped-name +# AVXcode: avx-code +# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# (or) +# opcode: escape # escaped-name +# EndTable +# +#<group maps> +# GrpTable: GrpXXX +# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# EndTable +# +# AVX Superscripts +# (VEX): this opcode can accept VEX prefix. +# (oVEX): this opcode requires VEX prefix. +# (o128): this opcode only supports 128bit VEX. +# (o256): this opcode only supports 256bit VEX. +# + +Table: one byte opcode +Referrer: +AVXcode: +# 0x00 - 0x0f +00: ADD Eb,Gb +01: ADD Ev,Gv +02: ADD Gb,Eb +03: ADD Gv,Ev +04: ADD AL,Ib +05: ADD rAX,Iz +06: PUSH ES (i64) +07: POP ES (i64) +08: OR Eb,Gb +09: OR Ev,Gv +0a: OR Gb,Eb +0b: OR Gv,Ev +0c: OR AL,Ib +0d: OR rAX,Iz +0e: PUSH CS (i64) +0f: escape # 2-byte escape +# 0x10 - 0x1f +10: ADC Eb,Gb +11: ADC Ev,Gv +12: ADC Gb,Eb +13: ADC Gv,Ev +14: ADC AL,Ib +15: ADC rAX,Iz +16: PUSH SS (i64) +17: POP SS (i64) +18: SBB Eb,Gb +19: SBB Ev,Gv +1a: SBB Gb,Eb +1b: SBB Gv,Ev +1c: SBB AL,Ib +1d: SBB rAX,Iz +1e: PUSH DS (i64) +1f: POP DS (i64) +# 0x20 - 0x2f +20: AND Eb,Gb +21: AND Ev,Gv +22: AND Gb,Eb +23: AND Gv,Ev +24: AND AL,Ib +25: AND rAx,Iz +26: SEG=ES (Prefix) +27: DAA (i64) +28: SUB Eb,Gb +29: SUB Ev,Gv +2a: SUB Gb,Eb +2b: SUB Gv,Ev +2c: SUB AL,Ib +2d: SUB rAX,Iz +2e: SEG=CS (Prefix) +2f: DAS (i64) +# 0x30 - 0x3f +30: XOR Eb,Gb +31: XOR Ev,Gv +32: XOR Gb,Eb +33: XOR Gv,Ev +34: XOR AL,Ib +35: XOR rAX,Iz +36: SEG=SS (Prefix) +37: AAA (i64) +38: CMP Eb,Gb +39: CMP Ev,Gv +3a: CMP Gb,Eb +3b: CMP Gv,Ev +3c: CMP AL,Ib +3d: CMP rAX,Iz +3e: SEG=DS (Prefix) +3f: AAS (i64) +# 0x40 - 0x4f +40: INC eAX (i64) | REX (o64) +41: INC eCX (i64) | REX.B (o64) +42: INC eDX (i64) | REX.X (o64) +43: INC eBX (i64) | REX.XB (o64) +44: INC eSP (i64) | REX.R (o64) +45: INC eBP (i64) | REX.RB (o64) +46: INC eSI (i64) | REX.RX (o64) +47: INC eDI (i64) | REX.RXB (o64) +48: DEC eAX (i64) | REX.W (o64) +49: DEC eCX (i64) | REX.WB (o64) +4a: DEC eDX (i64) | REX.WX (o64) +4b: DEC eBX (i64) | REX.WXB (o64) +4c: DEC eSP (i64) | REX.WR (o64) +4d: DEC eBP (i64) | REX.WRB (o64) +4e: DEC eSI (i64) | REX.WRX (o64) +4f: DEC eDI (i64) | REX.WRXB (o64) +# 0x50 - 0x5f +50: PUSH rAX/r8 (d64) +51: PUSH rCX/r9 (d64) +52: PUSH rDX/r10 (d64) +53: PUSH rBX/r11 (d64) +54: PUSH rSP/r12 (d64) +55: PUSH rBP/r13 (d64) +56: PUSH rSI/r14 (d64) +57: PUSH rDI/r15 (d64) +58: POP rAX/r8 (d64) +59: POP rCX/r9 (d64) +5a: POP rDX/r10 (d64) +5b: POP rBX/r11 (d64) +5c: POP rSP/r12 (d64) +5d: POP rBP/r13 (d64) +5e: POP rSI/r14 (d64) +5f: POP rDI/r15 (d64) +# 0x60 - 0x6f +60: PUSHA/PUSHAD (i64) +61: POPA/POPAD (i64) +62: BOUND Gv,Ma (i64) +63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) +64: SEG=FS (Prefix) +65: SEG=GS (Prefix) +66: Operand-Size (Prefix) +67: Address-Size (Prefix) +68: PUSH Iz (d64) +69: IMUL Gv,Ev,Iz +6a: PUSH Ib (d64) +6b: IMUL Gv,Ev,Ib +6c: INS/INSB Yb,DX +6d: INS/INSW/INSD Yz,DX +6e: OUTS/OUTSB DX,Xb +6f: OUTS/OUTSW/OUTSD DX,Xz +# 0x70 - 0x7f +70: JO Jb +71: JNO Jb +72: JB/JNAE/JC Jb +73: JNB/JAE/JNC Jb +74: JZ/JE Jb +75: JNZ/JNE Jb +76: JBE/JNA Jb +77: JNBE/JA Jb +78: JS Jb +79: JNS Jb +7a: JP/JPE Jb +7b: JNP/JPO Jb +7c: JL/JNGE Jb +7d: JNL/JGE Jb +7e: JLE/JNG Jb +7f: JNLE/JG Jb +# 0x80 - 0x8f +80: Grp1 Eb,Ib (1A) +81: Grp1 Ev,Iz (1A) +82: Grp1 Eb,Ib (1A),(i64) +83: Grp1 Ev,Ib (1A) +84: TEST Eb,Gb +85: TEST Ev,Gv +86: XCHG Eb,Gb +87: XCHG Ev,Gv +88: MOV Eb,Gb +89: MOV Ev,Gv +8a: MOV Gb,Eb +8b: MOV Gv,Ev +8c: MOV Ev,Sw +8d: LEA Gv,M +8e: MOV Sw,Ew +8f: Grp1A (1A) | POP Ev (d64) +# 0x90 - 0x9f +90: NOP | PAUSE (F3) | XCHG r8,rAX +91: XCHG rCX/r9,rAX +92: XCHG rDX/r10,rAX +93: XCHG rBX/r11,rAX +94: XCHG rSP/r12,rAX +95: XCHG rBP/r13,rAX +96: XCHG rSI/r14,rAX +97: XCHG rDI/r15,rAX +98: CBW/CWDE/CDQE +99: CWD/CDQ/CQO +9a: CALLF Ap (i64) +9b: FWAIT/WAIT +9c: PUSHF/D/Q Fv (d64) +9d: POPF/D/Q Fv (d64) +9e: SAHF +9f: LAHF +# 0xa0 - 0xaf +a0: MOV AL,Ob +a1: MOV rAX,Ov +a2: MOV Ob,AL +a3: MOV Ov,rAX +a4: MOVS/B Xb,Yb +a5: MOVS/W/D/Q Xv,Yv +a6: CMPS/B Xb,Yb +a7: CMPS/W/D Xv,Yv +a8: TEST AL,Ib +a9: TEST rAX,Iz +aa: STOS/B Yb,AL +ab: STOS/W/D/Q Yv,rAX +ac: LODS/B AL,Xb +ad: LODS/W/D/Q rAX,Xv +ae: SCAS/B AL,Yb +af: SCAS/W/D/Q rAX,Xv +# 0xb0 - 0xbf +b0: MOV AL/R8L,Ib +b1: MOV CL/R9L,Ib +b2: MOV DL/R10L,Ib +b3: MOV BL/R11L,Ib +b4: MOV AH/R12L,Ib +b5: MOV CH/R13L,Ib +b6: MOV DH/R14L,Ib +b7: MOV BH/R15L,Ib +b8: MOV rAX/r8,Iv +b9: MOV rCX/r9,Iv +ba: MOV rDX/r10,Iv +bb: MOV rBX/r11,Iv +bc: MOV rSP/r12,Iv +bd: MOV rBP/r13,Iv +be: MOV rSI/r14,Iv +bf: MOV rDI/r15,Iv +# 0xc0 - 0xcf +c0: Grp2 Eb,Ib (1A) +c1: Grp2 Ev,Ib (1A) +c2: RETN Iw (f64) +c3: RETN +c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) +c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) +c6: Grp11 Eb,Ib (1A) +c7: Grp11 Ev,Iz (1A) +c8: ENTER Iw,Ib +c9: LEAVE (d64) +ca: RETF Iw +cb: RETF +cc: INT3 +cd: INT Ib +ce: INTO (i64) +cf: IRET/D/Q +# 0xd0 - 0xdf +d0: Grp2 Eb,1 (1A) +d1: Grp2 Ev,1 (1A) +d2: Grp2 Eb,CL (1A) +d3: Grp2 Ev,CL (1A) +d4: AAM Ib (i64) +d5: AAD Ib (i64) +d6: +d7: XLAT/XLATB +d8: ESC +d9: ESC +da: ESC +db: ESC +dc: ESC +dd: ESC +de: ESC +df: ESC +# 0xe0 - 0xef +e0: LOOPNE/LOOPNZ Jb (f64) +e1: LOOPE/LOOPZ Jb (f64) +e2: LOOP Jb (f64) +e3: JrCXZ Jb (f64) +e4: IN AL,Ib +e5: IN eAX,Ib +e6: OUT Ib,AL +e7: OUT Ib,eAX +e8: CALL Jz (f64) +e9: JMP-near Jz (f64) +ea: JMP-far Ap (i64) +eb: JMP-short Jb (f64) +ec: IN AL,DX +ed: IN eAX,DX +ee: OUT DX,AL +ef: OUT DX,eAX +# 0xf0 - 0xff +f0: LOCK (Prefix) +f1: +f2: REPNE (Prefix) +f3: REP/REPE (Prefix) +f4: HLT +f5: CMC +f6: Grp3_1 Eb (1A) +f7: Grp3_2 Ev (1A) +f8: CLC +f9: STC +fa: CLI +fb: STI +fc: CLD +fd: STD +fe: Grp4 (1A) +ff: Grp5 (1A) +EndTable + +Table: 2-byte opcode (0x0f) +Referrer: 2-byte escape +AVXcode: 1 +# 0x0f 0x00-0x0f +00: Grp6 (1A) +01: Grp7 (1A) +02: LAR Gv,Ew +03: LSL Gv,Ew +04: +05: SYSCALL (o64) +06: CLTS +07: SYSRET (o64) +08: INVD +09: WBINVD +0a: +0b: UD2 (1B) +0c: +0d: NOP Ev | GrpP +0e: FEMMS +# 3DNow! uses the last imm byte as opcode extension. +0f: 3DNow! Pq,Qq,Ib +# 0x0f 0x10-0x1f +10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) +11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) +12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) +13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) +14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) +15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) +16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) +17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) +18: Grp16 (1A) +19: +1a: +1b: +1c: +1d: +1e: +1f: NOP Ev +# 0x0f 0x20-0x2f +20: MOV Rd,Cd +21: MOV Rd,Dd +22: MOV Cd,Rd +23: MOV Dd,Rd +24: +25: +26: +27: +28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) +29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) +2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) +2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) +2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) +2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) +2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) +2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) +# 0x0f 0x30-0x3f +30: WRMSR +31: RDTSC +32: RDMSR +33: RDPMC +34: SYSENTER +35: SYSEXIT +36: +37: GETSEC +38: escape # 3-byte escape 1 +39: +3a: escape # 3-byte escape 2 +3b: +3c: +3d: +3e: +3f: +# 0x0f 0x40-0x4f +40: CMOVO Gv,Ev +41: CMOVNO Gv,Ev +42: CMOVB/C/NAE Gv,Ev +43: CMOVAE/NB/NC Gv,Ev +44: CMOVE/Z Gv,Ev +45: CMOVNE/NZ Gv,Ev +46: CMOVBE/NA Gv,Ev +47: CMOVA/NBE Gv,Ev +48: CMOVS Gv,Ev +49: CMOVNS Gv,Ev +4a: CMOVP/PE Gv,Ev +4b: CMOVNP/PO Gv,Ev +4c: CMOVL/NGE Gv,Ev +4d: CMOVNL/GE Gv,Ev +4e: CMOVLE/NG Gv,Ev +4f: CMOVNLE/G Gv,Ev +# 0x0f 0x50-0x5f +50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) +51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) +52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) +53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) +54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) +55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) +56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) +57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) +58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) +59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) +5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) +5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) +5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) +5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) +5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) +5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) +# 0x0f 0x60-0x6f +60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) +61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) +62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) +63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) +64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) +65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) +66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) +67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) +68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) +69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) +6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) +6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) +6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) +6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) +6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) +6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) +# 0x0f 0x70-0x7f +70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) +71: Grp12 (1A) +72: Grp13 (1A) +73: Grp14 (1A) +74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) +75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) +76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) +77: emms/vzeroupper/vzeroall (VEX) +78: VMREAD Ed/q,Gd/q +79: VMWRITE Gd/q,Ed/q +7a: +7b: +7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) +7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) +7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) +7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) +# 0x0f 0x80-0x8f +80: JO Jz (f64) +81: JNO Jz (f64) +82: JB/JNAE/JC Jz (f64) +83: JNB/JAE/JNC Jz (f64) +84: JZ/JE Jz (f64) +85: JNZ/JNE Jz (f64) +86: JBE/JNA Jz (f64) +87: JNBE/JA Jz (f64) +88: JS Jz (f64) +89: JNS Jz (f64) +8a: JP/JPE Jz (f64) +8b: JNP/JPO Jz (f64) +8c: JL/JNGE Jz (f64) +8d: JNL/JGE Jz (f64) +8e: JLE/JNG Jz (f64) +8f: JNLE/JG Jz (f64) +# 0x0f 0x90-0x9f +90: SETO Eb +91: SETNO Eb +92: SETB/C/NAE Eb +93: SETAE/NB/NC Eb +94: SETE/Z Eb +95: SETNE/NZ Eb +96: SETBE/NA Eb +97: SETA/NBE Eb +98: SETS Eb +99: SETNS Eb +9a: SETP/PE Eb +9b: SETNP/PO Eb +9c: SETL/NGE Eb +9d: SETNL/GE Eb +9e: SETLE/NG Eb +9f: SETNLE/G Eb +# 0x0f 0xa0-0xaf +a0: PUSH FS (d64) +a1: POP FS (d64) +a2: CPUID +a3: BT Ev,Gv +a4: SHLD Ev,Gv,Ib +a5: SHLD Ev,Gv,CL +a6: GrpPDLK +a7: GrpRNG +a8: PUSH GS (d64) +a9: POP GS (d64) +aa: RSM +ab: BTS Ev,Gv +ac: SHRD Ev,Gv,Ib +ad: SHRD Ev,Gv,CL +ae: Grp15 (1A),(1C) +af: IMUL Gv,Ev +# 0x0f 0xb0-0xbf +b0: CMPXCHG Eb,Gb +b1: CMPXCHG Ev,Gv +b2: LSS Gv,Mp +b3: BTR Ev,Gv +b4: LFS Gv,Mp +b5: LGS Gv,Mp +b6: MOVZX Gv,Eb +b7: MOVZX Gv,Ew +b8: JMPE | POPCNT Gv,Ev (F3) +b9: Grp10 (1A) +ba: Grp8 Ev,Ib (1A) +bb: BTC Ev,Gv +bc: BSF Gv,Ev +bd: BSR Gv,Ev +be: MOVSX Gv,Eb +bf: MOVSX Gv,Ew +# 0x0f 0xc0-0xcf +c0: XADD Eb,Gb +c1: XADD Ev,Gv +c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) +c3: movnti Md/q,Gd/q +c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) +c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) +c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) +c7: Grp9 (1A) +c8: BSWAP RAX/EAX/R8/R8D +c9: BSWAP RCX/ECX/R9/R9D +ca: BSWAP RDX/EDX/R10/R10D +cb: BSWAP RBX/EBX/R11/R11D +cc: BSWAP RSP/ESP/R12/R12D +cd: BSWAP RBP/EBP/R13/R13D +ce: BSWAP RSI/ESI/R14/R14D +cf: BSWAP RDI/EDI/R15/R15D +# 0x0f 0xd0-0xdf +d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) +d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) +d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) +d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) +d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) +d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) +d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) +d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) +d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) +da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) +db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) +dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) +dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) +de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) +df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) +# 0x0f 0xe0-0xef +e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) +e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) +e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) +e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) +e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) +e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) +e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) +e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) +e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) +e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) +ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) +eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) +ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) +ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) +ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) +ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) +# 0x0f 0xf0-0xff +f0: lddqu Vdq,Mdq (F2),(VEX) +f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) +f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) +f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) +f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) +f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) +f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) +f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) +f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) +f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) +fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) +fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) +fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) +fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) +fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) +ff: +EndTable + +Table: 3-byte opcode 1 (0x0f 0x38) +Referrer: 3-byte escape 1 +AVXcode: 2 +# 0x0f 0x38 0x00-0x0f +00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) +01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) +02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) +03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) +04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) +05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) +06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) +07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) +08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) +09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) +0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) +0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) +0c: Vpermilps /r (66),(oVEX) +0d: Vpermilpd /r (66),(oVEX) +0e: vtestps /r (66),(oVEX) +0f: vtestpd /r (66),(oVEX) +# 0x0f 0x38 0x10-0x1f +10: pblendvb Vdq,Wdq (66) +11: +12: +13: +14: blendvps Vdq,Wdq (66) +15: blendvpd Vdq,Wdq (66) +16: +17: ptest Vdq,Wdq (66),(VEX) +18: vbroadcastss /r (66),(oVEX) +19: vbroadcastsd /r (66),(oVEX),(o256) +1a: vbroadcastf128 /r (66),(oVEX),(o256) +1b: +1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) +1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) +1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) +1f: +# 0x0f 0x38 0x20-0x2f +20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) +21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) +22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) +23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) +24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) +25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) +26: +27: +28: pmuldq Vdq,Wdq (66),(VEX),(o128) +29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) +2a: movntdqa Vdq,Mdq (66),(VEX),(o128) +2b: packusdw Vdq,Wdq (66),(VEX),(o128) +2c: vmaskmovps(ld) /r (66),(oVEX) +2d: vmaskmovpd(ld) /r (66),(oVEX) +2e: vmaskmovps(st) /r (66),(oVEX) +2f: vmaskmovpd(st) /r (66),(oVEX) +# 0x0f 0x38 0x30-0x3f +30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) +31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) +32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) +33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) +34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) +35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) +36: +37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) +38: pminsb Vdq,Wdq (66),(VEX),(o128) +39: pminsd Vdq,Wdq (66),(VEX),(o128) +3a: pminuw Vdq,Wdq (66),(VEX),(o128) +3b: pminud Vdq,Wdq (66),(VEX),(o128) +3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) +3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) +3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) +3f: pmaxud Vdq,Wdq (66),(VEX),(o128) +# 0x0f 0x38 0x40-0x8f +40: pmulld Vdq,Wdq (66),(VEX),(o128) +41: phminposuw Vdq,Wdq (66),(VEX),(o128) +80: INVEPT Gd/q,Mdq (66) +81: INVPID Gd/q,Mdq (66) +# 0x0f 0x38 0x90-0xbf (FMA) +96: vfmaddsub132pd/ps /r (66),(VEX) +97: vfmsubadd132pd/ps /r (66),(VEX) +98: vfmadd132pd/ps /r (66),(VEX) +99: vfmadd132sd/ss /r (66),(VEX),(o128) +9a: vfmsub132pd/ps /r (66),(VEX) +9b: vfmsub132sd/ss /r (66),(VEX),(o128) +9c: vfnmadd132pd/ps /r (66),(VEX) +9d: vfnmadd132sd/ss /r (66),(VEX),(o128) +9e: vfnmsub132pd/ps /r (66),(VEX) +9f: vfnmsub132sd/ss /r (66),(VEX),(o128) +a6: vfmaddsub213pd/ps /r (66),(VEX) +a7: vfmsubadd213pd/ps /r (66),(VEX) +a8: vfmadd213pd/ps /r (66),(VEX) +a9: vfmadd213sd/ss /r (66),(VEX),(o128) +aa: vfmsub213pd/ps /r (66),(VEX) +ab: vfmsub213sd/ss /r (66),(VEX),(o128) +ac: vfnmadd213pd/ps /r (66),(VEX) +ad: vfnmadd213sd/ss /r (66),(VEX),(o128) +ae: vfnmsub213pd/ps /r (66),(VEX) +af: vfnmsub213sd/ss /r (66),(VEX),(o128) +b6: vfmaddsub231pd/ps /r (66),(VEX) +b7: vfmsubadd231pd/ps /r (66),(VEX) +b8: vfmadd231pd/ps /r (66),(VEX) +b9: vfmadd231sd/ss /r (66),(VEX),(o128) +ba: vfmsub231pd/ps /r (66),(VEX) +bb: vfmsub231sd/ss /r (66),(VEX),(o128) +bc: vfnmadd231pd/ps /r (66),(VEX) +bd: vfnmadd231sd/ss /r (66),(VEX),(o128) +be: vfnmsub231pd/ps /r (66),(VEX) +bf: vfnmsub231sd/ss /r (66),(VEX),(o128) +# 0x0f 0x38 0xc0-0xff +db: aesimc Vdq,Wdq (66),(VEX),(o128) +dc: aesenc Vdq,Wdq (66),(VEX),(o128) +dd: aesenclast Vdq,Wdq (66),(VEX),(o128) +de: aesdec Vdq,Wdq (66),(VEX),(o128) +df: aesdeclast Vdq,Wdq (66),(VEX),(o128) +f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) +f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) +EndTable + +Table: 3-byte opcode 2 (0x0f 0x3a) +Referrer: 3-byte escape 2 +AVXcode: 3 +# 0x0f 0x3a 0x00-0xff +04: vpermilps /r,Ib (66),(oVEX) +05: vpermilpd /r,Ib (66),(oVEX) +06: vperm2f128 /r,Ib (66),(oVEX),(o256) +08: roundps Vdq,Wdq,Ib (66),(VEX) +09: roundpd Vdq,Wdq,Ib (66),(VEX) +0a: roundss Vss,Wss,Ib (66),(VEX),(o128) +0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) +0c: blendps Vdq,Wdq,Ib (66),(VEX) +0d: blendpd Vdq,Wdq,Ib (66),(VEX) +0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) +0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) +14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) +15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) +16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) +17: extractps Ed,Vdq,Ib (66),(VEX),(o128) +18: vinsertf128 /r,Ib (66),(oVEX),(o256) +19: vextractf128 /r,Ib (66),(oVEX),(o256) +20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) +21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) +22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) +40: dpps Vdq,Wdq,Ib (66),(VEX) +41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) +42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) +44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) +4a: vblendvps /r,Ib (66),(oVEX) +4b: vblendvpd /r,Ib (66),(oVEX) +4c: vpblendvb /r,Ib (66),(oVEX),(o128) +60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) +61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) +62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) +63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) +df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) +EndTable + +GrpTable: Grp1 +0: ADD +1: OR +2: ADC +3: SBB +4: AND +5: SUB +6: XOR +7: CMP +EndTable + +GrpTable: Grp1A +0: POP +EndTable + +GrpTable: Grp2 +0: ROL +1: ROR +2: RCL +3: RCR +4: SHL/SAL +5: SHR +6: +7: SAR +EndTable + +GrpTable: Grp3_1 +0: TEST Eb,Ib +1: +2: NOT Eb +3: NEG Eb +4: MUL AL,Eb +5: IMUL AL,Eb +6: DIV AL,Eb +7: IDIV AL,Eb +EndTable + +GrpTable: Grp3_2 +0: TEST Ev,Iz +1: +2: NOT Ev +3: NEG Ev +4: MUL rAX,Ev +5: IMUL rAX,Ev +6: DIV rAX,Ev +7: IDIV rAX,Ev +EndTable + +GrpTable: Grp4 +0: INC Eb +1: DEC Eb +EndTable + +GrpTable: Grp5 +0: INC Ev +1: DEC Ev +2: CALLN Ev (f64) +3: CALLF Ep +4: JMPN Ev (f64) +5: JMPF Ep +6: PUSH Ev (d64) +7: +EndTable + +GrpTable: Grp6 +0: SLDT Rv/Mw +1: STR Rv/Mw +2: LLDT Ew +3: LTR Ew +4: VERR Ew +5: VERW Ew +EndTable + +GrpTable: Grp7 +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) +3: LIDT Ms +4: SMSW Mw/Rv +5: +6: LMSW Ew +7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) +EndTable + +GrpTable: Grp8 +4: BT +5: BTS +6: BTR +7: BTC +EndTable + +GrpTable: Grp9 +1: CMPXCHG8B/16B Mq/Mdq +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) +7: VMPTRST Mq +EndTable + +GrpTable: Grp10 +EndTable + +GrpTable: Grp11 +0: MOV +EndTable + +GrpTable: Grp12 +2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) +4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) +6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) +EndTable + +GrpTable: Grp13 +2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) +4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) +6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) +EndTable + +GrpTable: Grp14 +2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) +3: psrldq Udq,Ib (66),(11B),(VEX),(o128) +6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) +7: pslldq Udq,Ib (66),(11B),(VEX),(o128) +EndTable + +GrpTable: Grp15 +0: fxsave +1: fxstor +2: ldmxcsr (VEX) +3: stmxcsr (VEX) +4: XSAVE +5: XRSTOR | lfence (11B) +6: mfence (11B) +7: clflush | sfence (11B) +EndTable + +GrpTable: Grp16 +0: prefetch NTA +1: prefetch T0 +2: prefetch T1 +3: prefetch T2 +EndTable + +# AMD's Prefetch Group +GrpTable: GrpP +0: PREFETCH +1: PREFETCHW +EndTable + +GrpTable: GrpPDLK +0: MONTMUL +1: XSHA1 +2: XSHA2 +EndTable + +GrpTable: GrpRNG +0: xstore-rng +1: xcrypt-ecb +2: xcrypt-cbc +4: xcrypt-cfb +5: xcrypt-ofb +EndTable diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 61b41ca3b5a..d0474ad2a6e 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -35,34 +35,3 @@ int fixup_exception(struct pt_regs *regs) return 0; } - -#ifdef CONFIG_X86_64 -/* - * Need to defined our own search_extable on X86_64 to work around - * a B stepping K8 bug. - */ -const struct exception_table_entry * -search_extable(const struct exception_table_entry *first, - const struct exception_table_entry *last, - unsigned long value) -{ - /* B stepping K8 bug */ - if ((value >> 32) == 0) - value |= 0xffffffffUL << 32; - - while (first <= last) { - const struct exception_table_entry *mid; - long diff; - - mid = (last - first) / 2 + first; - diff = mid->insn - value; - if (diff == 0) - return mid; - else if (diff < 0) - first = mid+1; - else - last = mid-1; - } - return NULL; -} -#endif diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f4cee9028cf..f62777940df 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -38,7 +38,8 @@ enum x86_pf_error_code { * Returns 0 if mmiotrace is disabled, or if the fault is not * handled by mmiotrace: */ -static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) +static inline int __kprobes +kmmio_fault(struct pt_regs *regs, unsigned long addr) { if (unlikely(is_kmmio_active())) if (kmmio_handler(regs, addr) == 1) @@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) return 0; } -static inline int notify_page_fault(struct pt_regs *regs) +static inline int __kprobes notify_page_fault(struct pt_regs *regs) { int ret = 0; @@ -240,7 +241,7 @@ void vmalloc_sync_all(void) * * Handle a fault on the vmalloc or module mapping area */ -static noinline int vmalloc_fault(unsigned long address) +static noinline __kprobes int vmalloc_fault(unsigned long address) { unsigned long pgd_paddr; pmd_t *pmd_k; @@ -357,7 +358,7 @@ void vmalloc_sync_all(void) * * This assumes no large pages in there. */ -static noinline int vmalloc_fault(unsigned long address) +static noinline __kprobes int vmalloc_fault(unsigned long address) { pgd_t *pgd, *pgd_ref; pud_t *pud, *pud_ref; @@ -658,7 +659,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, show_fault_oops(regs, error_code, address); stackend = end_of_stack(tsk); - if (*stackend != STACK_END_MAGIC) + if (tsk != &init_task && *stackend != STACK_END_MAGIC) printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); tsk->thread.cr2 = address; @@ -860,7 +861,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static noinline int +static noinline __kprobes int spurious_fault(unsigned long error_code, unsigned long address) { pgd_t *pgd; diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917..11a4ad4d625 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) struct die_args *arg = args; if (val == DIE_DEBUG && (arg->err & DR_STEP)) - if (post_kmmio_handler(arg->err, arg->regs) == 1) + if (post_kmmio_handler(arg->err, arg->regs) == 1) { + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; return NOTIFY_STOP; + } return NOTIFY_DONE; } diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index dbb5381f7b3..9d7ce96e5a5 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -136,7 +136,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", pxm, apic_id, node); } @@ -170,7 +170,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", pxm, apic_id, node); } diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 427fd1b56df..8565d944f7c 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -1,12 +1,13 @@ /* * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/io.h> #include <linux/mmiotrace.h> -#define MODULE_NAME "testmmiotrace" - static unsigned long mmio_address; module_param(mmio_address, ulong, 0); MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " @@ -30,7 +31,7 @@ static unsigned v32(unsigned i) static void do_write_test(void __iomem *p) { unsigned int i; - pr_info(MODULE_NAME ": write test.\n"); + pr_info("write test.\n"); mmiotrace_printk("Write test.\n"); for (i = 0; i < 256; i++) @@ -47,7 +48,7 @@ static void do_read_test(void __iomem *p) { unsigned int i; unsigned errs[3] = { 0 }; - pr_info(MODULE_NAME ": read test.\n"); + pr_info("read test.\n"); mmiotrace_printk("Read test.\n"); for (i = 0; i < 256; i++) @@ -68,7 +69,7 @@ static void do_read_test(void __iomem *p) static void do_read_far_test(void __iomem *p) { - pr_info(MODULE_NAME ": read far test.\n"); + pr_info("read far test.\n"); mmiotrace_printk("Read far test.\n"); ioread32(p + read_far); @@ -78,7 +79,7 @@ static void do_test(unsigned long size) { void __iomem *p = ioremap_nocache(mmio_address, size); if (!p) { - pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); + pr_err("could not ioremap, aborting.\n"); return; } mmiotrace_printk("ioremap returned %p.\n", p); @@ -94,24 +95,22 @@ static int __init init(void) unsigned long size = (read_far) ? (8 << 20) : (16 << 10); if (mmio_address == 0) { - pr_err(MODULE_NAME ": you have to use the module argument " - "mmio_address.\n"); - pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" - " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); + pr_err("you have to use the module argument mmio_address.\n"); + pr_err("DO NOT LOAD THIS MODULE UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!\n"); return -ENXIO; } - pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " - "address space, and writing 16 kB of rubbish in there.\n", - size >> 10, mmio_address); + pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, " + "and writing 16 kB of rubbish in there.\n", + size >> 10, mmio_address); do_test(size); - pr_info(MODULE_NAME ": All done.\n"); + pr_info("All done.\n"); return 0; } static void __exit cleanup(void) { - pr_debug(MODULE_NAME ": unloaded.\n"); + pr_debug("unloaded.\n"); } module_init(init); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 8aa85f17667..0a979f3e5b8 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -18,6 +18,7 @@ #include <asm/mce.h> #include <asm/xcr.h> #include <asm/suspend.h> +#include <asm/debugreg.h> #ifdef CONFIG_X86_32 static struct saved_context saved_context; @@ -142,31 +143,6 @@ static void fix_processor_context(void) #endif load_TR_desc(); /* This does ltr */ load_LDT(¤t->active_mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - if (current->thread.debugreg7) { -#ifdef CONFIG_X86_32 - set_debugreg(current->thread.debugreg0, 0); - set_debugreg(current->thread.debugreg1, 1); - set_debugreg(current->thread.debugreg2, 2); - set_debugreg(current->thread.debugreg3, 3); - /* no 4 and 5 */ - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); -#else - /* CONFIG_X86_64 */ - loaddebug(¤t->thread, 0); - loaddebug(¤t->thread, 1); - loaddebug(¤t->thread, 2); - loaddebug(¤t->thread, 3); - /* no 4 and 5 */ - loaddebug(¤t->thread, 6); - loaddebug(¤t->thread, 7); -#endif - } - } /** diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile new file mode 100644 index 00000000000..f8208267733 --- /dev/null +++ b/arch/x86/tools/Makefile @@ -0,0 +1,31 @@ +PHONY += posttest + +ifeq ($(KBUILD_VERBOSE),1) + posttest_verbose = -v +else + posttest_verbose = +endif + +ifeq ($(CONFIG_64BIT),y) + posttest_64bit = -y +else + posttest_64bit = -n +endif + +distill_awk = $(srctree)/arch/x86/tools/distill.awk +chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk + +quiet_cmd_posttest = TEST $@ + cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) + +posttest: $(obj)/test_get_len vmlinux + $(call cmd,posttest) + +hostprogs-y := test_get_len + +# -I needed for generated C source and C source which in the kernel tree. +HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ + +# Dependencies are also needed. +$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c + diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk new file mode 100644 index 00000000000..0d13cd9fdcf --- /dev/null +++ b/arch/x86/tools/chkobjdump.awk @@ -0,0 +1,23 @@ +# GNU objdump version checker +# +# Usage: +# objdump -v | awk -f chkobjdump.awk +BEGIN { + # objdump version 2.19 or later is OK for the test. + od_ver = 2; + od_sver = 19; +} + +/^GNU/ { + split($4, ver, "."); + if (ver[1] > od_ver || + (ver[1] == od_ver && ver[2] >= od_sver)) { + exit 1; + } else { + printf("Warning: objdump version %s is older than %d.%d\n", + $4, od_ver, od_sver); + print("Warning: Skipping posttest."); + # Logic is inverted, because we just skip test without error. + exit 0; + } +} diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk new file mode 100644 index 00000000000..c13c0ee48ab --- /dev/null +++ b/arch/x86/tools/distill.awk @@ -0,0 +1,47 @@ +#!/bin/awk -f +# Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len +# Distills the disassembly as follows: +# - Removes all lines except the disassembled instructions. +# - For instructions that exceed 1 line (7 bytes), crams all the hex bytes +# into a single line. +# - Remove bad(or prefix only) instructions + +BEGIN { + prev_addr = "" + prev_hex = "" + prev_mnemonic = "" + bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))" + fwait_expr = "^9b " + fwait_str="9b\tfwait" +} + +/^ *[0-9a-f]+ <[^>]*>:/ { + # Symbol entry + printf("%s%s\n", $2, $1) +} + +/^ *[0-9a-f]+:/ { + if (split($0, field, "\t") < 3) { + # This is a continuation of the same insn. + prev_hex = prev_hex field[2] + } else { + # Skip bad instructions + if (match(prev_mnemonic, bad_expr)) + prev_addr = "" + # Split fwait from other f* instructions + if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") { + printf "%s\t%s\n", prev_addr, fwait_str + sub(fwait_expr, "", prev_hex) + } + if (prev_addr != "") + printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic + prev_addr = field[1] + prev_hex = field[2] + prev_mnemonic = field[3] + } +} + +END { + if (prev_addr != "") + printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic +} diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk new file mode 100644 index 00000000000..e34e92a28eb --- /dev/null +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -0,0 +1,380 @@ +#!/bin/awk -f +# gen-insn-attr-x86.awk: Instruction attribute table generator +# Written by Masami Hiramatsu <mhiramat@redhat.com> +# +# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c + +# Awk implementation sanity check +function check_awk_implement() { + if (!match("abc", "[[:lower:]]+")) + return "Your awk doesn't support charactor-class." + if (sprintf("%x", 0) != "0") + return "Your awk has a printf-format problem." + return "" +} + +# Clear working vars +function clear_vars() { + delete table + delete lptable2 + delete lptable1 + delete lptable3 + eid = -1 # escape id + gid = -1 # group id + aid = -1 # AVX id + tname = "" +} + +BEGIN { + # Implementation error checking + awkchecked = check_awk_implement() + if (awkchecked != "") { + print "Error: " awkchecked > "/dev/stderr" + print "Please try to use gawk." > "/dev/stderr" + exit 1 + } + + # Setup generating tables + print "/* x86 opcode map generated from x86-opcode-map.txt */" + print "/* Do not change this code. */\n" + ggid = 1 + geid = 1 + gaid = 0 + delete etable + delete gtable + delete atable + + opnd_expr = "^[[:alpha:]/]" + ext_expr = "^\\(" + sep_expr = "^\\|$" + group_expr = "^Grp[[:alnum:]]+" + + imm_expr = "^[IJAO][[:lower:]]" + imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" + imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" + imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" + imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" + imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" + imm_flag["Ob"] = "INAT_MOFFSET" + imm_flag["Ov"] = "INAT_MOFFSET" + + modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])" + force64_expr = "\\([df]64\\)" + rex_expr = "^REX(\\.[XRWB]+)*" + fpu_expr = "^ESC" # TODO + + lprefix1_expr = "\\(66\\)" + lprefix2_expr = "\\(F3\\)" + lprefix3_expr = "\\(F2\\)" + max_lprefix = 4 + + vexok_expr = "\\(VEX\\)" + vexonly_expr = "\\(oVEX\\)" + + prefix_expr = "\\(Prefix\\)" + prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" + prefix_num["REPNE"] = "INAT_PFX_REPNE" + prefix_num["REP/REPE"] = "INAT_PFX_REPE" + prefix_num["LOCK"] = "INAT_PFX_LOCK" + prefix_num["SEG=CS"] = "INAT_PFX_CS" + prefix_num["SEG=DS"] = "INAT_PFX_DS" + prefix_num["SEG=ES"] = "INAT_PFX_ES" + prefix_num["SEG=FS"] = "INAT_PFX_FS" + prefix_num["SEG=GS"] = "INAT_PFX_GS" + prefix_num["SEG=SS"] = "INAT_PFX_SS" + prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" + prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" + prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" + + clear_vars() +} + +function semantic_error(msg) { + print "Semantic error at " NR ": " msg > "/dev/stderr" + exit 1 +} + +function debug(msg) { + print "DEBUG: " msg +} + +function array_size(arr, i,c) { + c = 0 + for (i in arr) + c++ + return c +} + +/^Table:/ { + print "/* " $0 " */" + if (tname != "") + semantic_error("Hit Table: before EndTable:."); +} + +/^Referrer:/ { + if (NF != 1) { + # escape opcode table + ref = "" + for (i = 2; i <= NF; i++) + ref = ref $i + eid = escape[ref] + tname = sprintf("inat_escape_table_%d", eid) + } +} + +/^AVXcode:/ { + if (NF != 1) { + # AVX/escape opcode table + aid = $2 + if (gaid <= aid) + gaid = aid + 1 + if (tname == "") # AVX only opcode table + tname = sprintf("inat_avx_table_%d", $2) + } + if (aid == -1 && eid == -1) # primary opcode table + tname = "inat_primary_table" +} + +/^GrpTable:/ { + print "/* " $0 " */" + if (!($2 in group)) + semantic_error("No group: " $2 ) + gid = group[$2] + tname = "inat_group_table_" gid +} + +function print_table(tbl,name,fmt,n) +{ + print "const insn_attr_t " name " = {" + for (i = 0; i < n; i++) { + id = sprintf(fmt, i) + if (tbl[id]) + print " [" id "] = " tbl[id] "," + } + print "};" +} + +/^EndTable/ { + if (gid != -1) { + # print group tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,3] = tname "_3" + } + } else { + # print primary/escaped tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,0] = tname + if (aid >= 0) + atable[aid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,1] = tname "_1" + if (aid >= 0) + atable[aid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,2] = tname "_2" + if (aid >= 0) + atable[aid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,3] = tname "_3" + if (aid >= 0) + atable[aid,3] = tname "_3" + } + } + print "" + clear_vars() +} + +function add_flags(old,new) { + if (old && new) + return old " | " new + else if (old) + return old + else + return new +} + +# convert operands to flags. +function convert_operands(opnd, i,imm,mod) +{ + imm = null + mod = null + for (i in opnd) { + i = opnd[i] + if (match(i, imm_expr) == 1) { + if (!imm_flag[i]) + semantic_error("Unknown imm opnd: " i) + if (imm) { + if (i != "Ib") + semantic_error("Second IMM error") + imm = add_flags(imm, "INAT_SCNDIMM") + } else + imm = imm_flag[i] + } else if (match(i, modrm_expr)) + mod = "INAT_MODRM" + } + return add_flags(imm, mod) +} + +/^[0-9a-f]+\:/ { + if (NR == 1) + next + # get index + idx = "0x" substr($1, 1, index($1,":") - 1) + if (idx in table) + semantic_error("Redefine " idx " in " tname) + + # check if escaped opcode + if ("escape" == $2) { + if ($3 != "#") + semantic_error("No escaped name") + ref = "" + for (i = 4; i <= NF; i++) + ref = ref $i + if (ref in escape) + semantic_error("Redefine escape (" ref ")") + escape[ref] = geid + geid++ + table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" + next + } + + variant = null + # converts + i = 2 + while (i <= NF) { + opcode = $(i++) + delete opnds + ext = null + flags = null + opnd = null + # parse one opcode + if (match($i, opnd_expr)) { + opnd = $i + split($(i++), opnds, ",") + flags = convert_operands(opnds) + } + if (match($i, ext_expr)) + ext = $(i++) + if (match($i, sep_expr)) + i++ + else if (i < NF) + semantic_error($i " is not a separator") + + # check if group opcode + if (match(opcode, group_expr)) { + if (!(opcode in group)) { + group[opcode] = ggid + ggid++ + } + flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") + } + # check force(or default) 64bit + if (match(ext, force64_expr)) + flags = add_flags(flags, "INAT_FORCE64") + + # check REX prefix + if (match(opcode, rex_expr)) + flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") + + # check coprocessor escape : TODO + if (match(opcode, fpu_expr)) + flags = add_flags(flags, "INAT_MODRM") + + # check VEX only code + if (match(ext, vexonly_expr)) + flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") + + # check VEX only code + if (match(ext, vexok_expr)) + flags = add_flags(flags, "INAT_VEXOK") + + # check prefixes + if (match(ext, prefix_expr)) { + if (!prefix_num[opcode]) + semantic_error("Unknown prefix: " opcode) + flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") + } + if (length(flags) == 0) + continue + # check if last prefix + if (match(ext, lprefix1_expr)) { + lptable1[idx] = add_flags(lptable1[idx],flags) + variant = "INAT_VARIANT" + } else if (match(ext, lprefix2_expr)) { + lptable2[idx] = add_flags(lptable2[idx],flags) + variant = "INAT_VARIANT" + } else if (match(ext, lprefix3_expr)) { + lptable3[idx] = add_flags(lptable3[idx],flags) + variant = "INAT_VARIANT" + } else { + table[idx] = add_flags(table[idx],flags) + } + } + if (variant) + table[idx] = add_flags(table[idx],variant) +} + +END { + if (awkchecked != "") + exit 1 + # print escape opcode map's array + print "/* Escape opcode map array */" + print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print " ["i"]["j"] = "etable[i,j]"," + print "};\n" + # print group opcode map's array + print "/* Group opcode map array */" + print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print " ["i"]["j"] = "gtable[i,j]"," + print "};\n" + # print AVX opcode map's array + print "/* AVX opcode map array */" + print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < gaid; i++) + for (j = 0; j < max_lprefix; j++) + if (atable[i,j]) + print " ["i"]["j"] = "atable[i,j]"," + print "};" +} + diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c new file mode 100644 index 00000000000..d8214dc03fa --- /dev/null +++ b/arch/x86/tools/test_get_len.c @@ -0,0 +1,173 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <unistd.h> + +#define unlikely(cond) (cond) + +#include <asm/insn.h> +#include <inat.c> +#include <insn.c> + +/* + * Test of instruction analysis in general and insn_get_length() in + * particular. See if insn_get_length() and the disassembler agree + * on the length of each instruction in an elf disassembly. + * + * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len + */ + +const char *prog; +static int verbose; +static int x86_64; + +static void usage(void) +{ + fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" + " %s [-y|-n] [-v] \n", prog); + fprintf(stderr, "\t-y 64bit mode\n"); + fprintf(stderr, "\t-n 32bit mode\n"); + fprintf(stderr, "\t-v verbose mode\n"); + exit(1); +} + +static void malformed_line(const char *line, int line_nr) +{ + fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line); + exit(3); +} + +static void dump_field(FILE *fp, const char *name, const char *indent, + struct insn_field *field) +{ + fprintf(fp, "%s.%s = {\n", indent, name); + fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", + indent, field->value, field->bytes[0], field->bytes[1], + field->bytes[2], field->bytes[3]); + fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, + field->got, field->nbytes); +} + +static void dump_insn(FILE *fp, struct insn *insn) +{ + fprintf(fp, "Instruction = { \n"); + dump_field(fp, "prefixes", "\t", &insn->prefixes); + dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); + dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); + dump_field(fp, "opcode", "\t", &insn->opcode); + dump_field(fp, "modrm", "\t", &insn->modrm); + dump_field(fp, "sib", "\t", &insn->sib); + dump_field(fp, "displacement", "\t", &insn->displacement); + dump_field(fp, "immediate1", "\t", &insn->immediate1); + dump_field(fp, "immediate2", "\t", &insn->immediate2); + fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", + insn->attr, insn->opnd_bytes, insn->addr_bytes); + fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", + insn->length, insn->x86_64, insn->kaddr); +} + +static void parse_args(int argc, char **argv) +{ + int c; + prog = argv[0]; + while ((c = getopt(argc, argv, "ynv")) != -1) { + switch (c) { + case 'y': + x86_64 = 1; + break; + case 'n': + x86_64 = 0; + break; + case 'v': + verbose = 1; + break; + default: + usage(); + } + } +} + +#define BUFSIZE 256 + +int main(int argc, char **argv) +{ + char line[BUFSIZE], sym[BUFSIZE] = "<unknown>"; + unsigned char insn_buf[16]; + struct insn insn; + int insns = 0, c; + int warnings = 0; + + parse_args(argc, argv); + + while (fgets(line, BUFSIZE, stdin)) { + char copy[BUFSIZE], *s, *tab1, *tab2; + int nb = 0; + unsigned int b; + + if (line[0] == '<') { + /* Symbol line */ + strcpy(sym, line); + continue; + } + + insns++; + memset(insn_buf, 0, 16); + strcpy(copy, line); + tab1 = strchr(copy, '\t'); + if (!tab1) + malformed_line(line, insns); + s = tab1 + 1; + s += strspn(s, " "); + tab2 = strchr(s, '\t'); + if (!tab2) + malformed_line(line, insns); + *tab2 = '\0'; /* Characters beyond tab2 aren't examined */ + while (s < tab2) { + if (sscanf(s, "%x", &b) == 1) { + insn_buf[nb++] = (unsigned char) b; + s += 3; + } else + break; + } + /* Decode an instruction */ + insn_init(&insn, insn_buf, x86_64); + insn_get_length(&insn); + if (insn.length != nb) { + warnings++; + fprintf(stderr, "Warning: %s found difference at %s\n", + prog, sym); + fprintf(stderr, "Warning: %s", line); + fprintf(stderr, "Warning: objdump says %d bytes, but " + "insn_get_length() says %d\n", nb, + insn.length); + if (verbose) + dump_insn(stderr, &insn); + } + } + if (warnings) + fprintf(stderr, "Warning: decoded and checked %d" + " instructions with %d warnings\n", insns, warnings); + else + fprintf(stderr, "Succeed: decoded and checked %d" + " instructions\n", insns); + return 0; +} |