From 1c7d06d419dbe82c76fbb4d3e1fa61b2da2dc00b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Apr 2008 22:12:05 +0200
Subject: revert: thread_info.h change

temporarily revert parts of "signals: x86 TS_RESTORE_SIGMASK".

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/thread_info_32.h | 13 ++-----------
 include/asm-x86/thread_info_64.h | 13 ++-----------
 2 files changed, 4 insertions(+), 22 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h
index b6338829d1a..53185996209 100644
--- a/include/asm-x86/thread_info_32.h
+++ b/include/asm-x86/thread_info_32.h
@@ -131,6 +131,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SYSCALL_EMU		5	/* syscall emulation active */
 #define TIF_SYSCALL_AUDIT	6	/* syscall auditing active */
 #define TIF_SECCOMP		7	/* secure computing */
+#define TIF_RESTORE_SIGMASK	8	/* restore signal mask in do_signal() */
 #define TIF_HRTICK_RESCHED	9	/* reprogram hrtick timer */
 #define TIF_MEMDIE		16
 #define TIF_DEBUG		17	/* uses debug registers */
@@ -150,6 +151,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_HRTICK_RESCHED	(1 << TIF_HRTICK_RESCHED)
 #define _TIF_DEBUG		(1 << TIF_DEBUG)
 #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
@@ -186,20 +188,9 @@ static inline struct thread_info *current_thread_info(void)
 					   this quantum (SMP) */
 #define TS_POLLING		0x0002	/* True if in idle loop
 					   and not sleeping */
-#define TS_RESTORE_SIGMASK	0x0004	/* restore signal mask in do_signal() */
 
 #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
 
-#ifndef __ASSEMBLY__
-#define HAVE_SET_RESTORE_SIGMASK	1
-static inline void set_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	ti->status |= TS_RESTORE_SIGMASK;
-	set_bit(TIF_SIGPENDING, &ti->flags);
-}
-#endif	/* !__ASSEMBLY__ */
-
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_THREAD_INFO_H */
diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h
index cb69f70abba..ed664e874de 100644
--- a/include/asm-x86/thread_info_64.h
+++ b/include/asm-x86/thread_info_64.h
@@ -109,6 +109,7 @@ static inline struct thread_info *stack_thread_info(void)
 #define TIF_IRET		5	/* force IRET */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
+#define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal */
 #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
 #define TIF_HRTICK_RESCHED	11	/* reprogram hrtick timer */
 /* 16 free */
@@ -132,6 +133,7 @@ static inline struct thread_info *stack_thread_info(void)
 #define _TIF_IRET		(1 << TIF_IRET)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
 #define _TIF_HRTICK_RESCHED	(1 << TIF_HRTICK_RESCHED)
 #define _TIF_IA32		(1 << TIF_IA32)
@@ -176,20 +178,9 @@ static inline struct thread_info *stack_thread_info(void)
 #define TS_COMPAT		0x0002	/* 32bit syscall active */
 #define TS_POLLING		0x0004	/* true if in idle loop
 					   and not sleeping */
-#define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal() */
 
 #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
 
-#ifndef __ASSEMBLY__
-#define HAVE_SET_RESTORE_SIGMASK	1
-static inline void set_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	ti->status |= TS_RESTORE_SIGMASK;
-	set_bit(TIF_SIGPENDING, &ti->flags);
-}
-#endif	/* !__ASSEMBLY__ */
-
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_THREAD_INFO_H */
-- 
cgit v1.2.3-70-g09d2


From 2052e8d40ad58b1d364f900e70edfda62caa0874 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 12 May 2008 15:43:41 +0200
Subject: x86: merge thread_info.h

Simple merge of both thread_info_32.h and thread_info_64.h into thread_info.h.

Comments for #ifndef __ASM_THREAD_INFO_H and #ifdef __KERNEL__
are the same.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/thread_info.h    | 366 ++++++++++++++++++++++++++++++++++++++-
 include/asm-x86/thread_info_32.h | 196 ---------------------
 include/asm-x86/thread_info_64.h | 186 --------------------
 3 files changed, 364 insertions(+), 384 deletions(-)
 delete mode 100644 include/asm-x86/thread_info_32.h
 delete mode 100644 include/asm-x86/thread_info_64.h

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 77244f17993..d59384ac0ba 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -1,10 +1,372 @@
+/* thread_info.h: low-level thread information
+ *
+ * Copyright (C) 2002  David Howells (dhowells@redhat.com)
+ * - Incorporating suggestions made by Linus Torvalds and Dave Miller
+ */
+
 #ifndef _ASM_X86_THREAD_INFO_H
+#define _ASM_X86_THREAD_INFO_H
+
 #ifdef CONFIG_X86_32
-# include "thread_info_32.h"
+#include <linux/compiler.h>
+#include <asm/page.h>
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#endif
+
+/*
+ * low level task data that entry.S needs immediate access to
+ * - this struct should fit entirely inside of one cache line
+ * - this struct shares the supervisor stack pages
+ * - if the contents of this structure are changed,
+ *   the assembly constants must also be changed
+ */
+#ifndef __ASSEMBLY__
+
+struct thread_info {
+	struct task_struct	*task;		/* main task structure */
+	struct exec_domain	*exec_domain;	/* execution domain */
+	unsigned long		flags;		/* low level flags */
+	unsigned long		status;		/* thread-synchronous flags */
+	__u32			cpu;		/* current CPU */
+	int			preempt_count;	/* 0 => preemptable,
+						   <0 => BUG */
+	mm_segment_t		addr_limit;	/* thread address space:
+						   0-0xBFFFFFFF user-thread
+						   0-0xFFFFFFFF kernel-thread
+						*/
+	void			*sysenter_return;
+	struct restart_block    restart_block;
+	unsigned long           previous_esp;   /* ESP of the previous stack in
+						   case of nested (IRQ) stacks
+						*/
+	__u8			supervisor_stack[0];
+};
+
+#else /* !__ASSEMBLY__ */
+
+#include <asm/asm-offsets.h>
+
+#endif
+
+#define PREEMPT_ACTIVE		0x10000000
+#ifdef CONFIG_4KSTACKS
+#define THREAD_SIZE            (4096)
 #else
-# include "thread_info_64.h"
+#define THREAD_SIZE		(8192)
 #endif
 
+#define STACK_WARN             (THREAD_SIZE/8)
+/*
+ * macros/functions for gaining access to the thread information structure
+ *
+ * preempt_count needs to be 1 initially, until the scheduler is functional.
+ */
+#ifndef __ASSEMBLY__
+
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task		= &tsk,			\
+	.exec_domain	= &default_exec_domain,	\
+	.flags		= 0,			\
+	.cpu		= 0,			\
+	.preempt_count	= 1,			\
+	.addr_limit	= KERNEL_DS,		\
+	.restart_block = {			\
+		.fn = do_no_restart_syscall,	\
+	},					\
+}
+
+#define init_thread_info	(init_thread_union.thread_info)
+#define init_stack		(init_thread_union.stack)
+
+
+/* how to get the current stack pointer from C */
+register unsigned long current_stack_pointer asm("esp") __used;
+
+/* how to get the thread information struct from C */
+static inline struct thread_info *current_thread_info(void)
+{
+	return (struct thread_info *)
+		(current_stack_pointer & ~(THREAD_SIZE - 1));
+}
+
+/* thread information allocation */
+#ifdef CONFIG_DEBUG_STACK_USAGE
+#define alloc_thread_info(tsk) ((struct thread_info *)			\
+	__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(THREAD_SIZE)))
+#else
+#define alloc_thread_info(tsk) ((struct thread_info *)			\
+	__get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE)))
+#endif
+
+#else /* !__ASSEMBLY__ */
+
+/* how to get the thread information struct from ASM */
+#define GET_THREAD_INFO(reg)	 \
+	movl $-THREAD_SIZE, reg; \
+	andl %esp, reg
+
+/* use this one if reg already contains %esp */
+#define GET_THREAD_INFO_WITH_ESP(reg) \
+	andl $-THREAD_SIZE, reg
+
+#endif
+
+/*
+ * thread information flags
+ * - these are process state flags that various
+ *   assembly files may need to access
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ */
+#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_SIGPENDING		1	/* signal pending */
+#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
+#define TIF_SINGLESTEP		3	/* restore singlestep on return to
+					   user mode */
+#define TIF_IRET		4	/* return with iret */
+#define TIF_SYSCALL_EMU		5	/* syscall emulation active */
+#define TIF_SYSCALL_AUDIT	6	/* syscall auditing active */
+#define TIF_SECCOMP		7	/* secure computing */
+#define TIF_RESTORE_SIGMASK	8	/* restore signal mask in do_signal() */
+#define TIF_HRTICK_RESCHED	9	/* reprogram hrtick timer */
+#define TIF_MEMDIE		16
+#define TIF_DEBUG		17	/* uses debug registers */
+#define TIF_IO_BITMAP		18	/* uses I/O bitmap */
+#define TIF_FREEZE		19	/* is freezing for suspend */
+#define TIF_NOTSC		20	/* TSC is not accessible in userland */
+#define TIF_FORCED_TF		21	/* true if TF in eflags artificially */
+#define TIF_DEBUGCTLMSR		22	/* uses thread_struct.debugctlmsr */
+#define TIF_DS_AREA_MSR 	23      /* uses thread_struct.ds_area_msr */
+#define TIF_BTS_TRACE_TS        24      /* record scheduling event timestamps */
+
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
+#define _TIF_IRET		(1 << TIF_IRET)
+#define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
+#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
+#define _TIF_HRTICK_RESCHED	(1 << TIF_HRTICK_RESCHED)
+#define _TIF_DEBUG		(1 << TIF_DEBUG)
+#define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
+#define _TIF_FREEZE		(1 << TIF_FREEZE)
+#define _TIF_NOTSC		(1 << TIF_NOTSC)
+#define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
+#define _TIF_DEBUGCTLMSR	(1 << TIF_DEBUGCTLMSR)
+#define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
+#define _TIF_BTS_TRACE_TS	(1 << TIF_BTS_TRACE_TS)
+
+/* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK							\
+	(0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT |	\
+			_TIF_SECCOMP | _TIF_SYSCALL_EMU))
+/* work to do on any return to u-space */
+#define _TIF_ALLWORK_MASK	(0x0000FFFF & ~_TIF_SECCOMP)
+
+/* flags to check in __switch_to() */
+#define _TIF_WORK_CTXSW						\
+	(_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR |	\
+	 _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS)
+#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG)
+
+
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#define TS_USEDFPU		0x0001	/* FPU was used by this task
+					   this quantum (SMP) */
+#define TS_POLLING		0x0002	/* True if in idle loop
+					   and not sleeping */
+
+#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
+
+#else /* X86_32 */
+
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/pda.h>
+
+/*
+ * low level task data that entry.S needs immediate access to
+ * - this struct should fit entirely inside of one cache line
+ * - this struct shares the supervisor stack pages
+ */
+#ifndef __ASSEMBLY__
+struct task_struct;
+struct exec_domain;
+#include <asm/processor.h>
+
+struct thread_info {
+	struct task_struct	*task;		/* main task structure */
+	struct exec_domain	*exec_domain;	/* execution domain */
+	__u32			flags;		/* low level flags */
+	__u32			status;		/* thread synchronous flags */
+	__u32			cpu;		/* current CPU */
+	int 			preempt_count;	/* 0 => preemptable,
+						   <0 => BUG */
+	mm_segment_t		addr_limit;
+	struct restart_block    restart_block;
+#ifdef CONFIG_IA32_EMULATION
+	void __user		*sysenter_return;
+#endif
+};
+#endif
+
+/*
+ * macros/functions for gaining access to the thread information structure
+ * preempt_count needs to be 1 initially, until the scheduler is functional.
+ */
+#ifndef __ASSEMBLY__
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task	       = &tsk,			\
+	.exec_domain   = &default_exec_domain,	\
+	.flags	       = 0,			\
+	.cpu	       = 0,			\
+	.preempt_count = 1,			\
+	.addr_limit     = KERNEL_DS,		\
+	.restart_block = {			\
+		.fn = do_no_restart_syscall,	\
+	},					\
+}
+
+#define init_thread_info	(init_thread_union.thread_info)
+#define init_stack		(init_thread_union.stack)
+
+static inline struct thread_info *current_thread_info(void)
+{
+	struct thread_info *ti;
+	ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
+	return ti;
+}
+
+/* do not use in interrupt context */
+static inline struct thread_info *stack_thread_info(void)
+{
+	struct thread_info *ti;
+	asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1)));
+	return ti;
+}
+
+/* thread information allocation */
+#ifdef CONFIG_DEBUG_STACK_USAGE
+#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO)
+#else
+#define THREAD_FLAGS GFP_KERNEL
+#endif
+
+#define alloc_thread_info(tsk)						\
+	((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
+
+#else /* !__ASSEMBLY__ */
+
+/* how to get the thread information struct from ASM */
+#define GET_THREAD_INFO(reg) \
+	movq %gs:pda_kernelstack,reg ; \
+	subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
+
+#endif
+
+/*
+ * thread information flags
+ * - these are process state flags that various assembly files
+ *   may need to access
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ * Warning: layout of LSW is hardcoded in entry.S
+ */
+#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_SIGPENDING		2	/* signal pending */
+#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+#define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
+#define TIF_IRET		5	/* force IRET */
+#define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
+#define TIF_SECCOMP		8	/* secure computing */
+#define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal */
+#define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
+#define TIF_HRTICK_RESCHED	11	/* reprogram hrtick timer */
+/* 16 free */
+#define TIF_IA32		17	/* 32bit process */
+#define TIF_FORK		18	/* ret_from_fork */
+#define TIF_ABI_PENDING		19
+#define TIF_MEMDIE		20
+#define TIF_DEBUG		21	/* uses debug registers */
+#define TIF_IO_BITMAP		22	/* uses I/O bitmap */
+#define TIF_FREEZE		23	/* is freezing for suspend */
+#define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
+#define TIF_DEBUGCTLMSR		25	/* uses thread_struct.debugctlmsr */
+#define TIF_DS_AREA_MSR		26      /* uses thread_struct.ds_area_msr */
+#define TIF_BTS_TRACE_TS	27      /* record scheduling event timestamps */
+#define TIF_NOTSC		28	/* TSC is not accessible in userland */
+
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
+#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_IRET		(1 << TIF_IRET)
+#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
+#define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
+#define _TIF_HRTICK_RESCHED	(1 << TIF_HRTICK_RESCHED)
+#define _TIF_IA32		(1 << TIF_IA32)
+#define _TIF_FORK		(1 << TIF_FORK)
+#define _TIF_ABI_PENDING	(1 << TIF_ABI_PENDING)
+#define _TIF_DEBUG		(1 << TIF_DEBUG)
+#define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
+#define _TIF_FREEZE		(1 << TIF_FREEZE)
+#define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
+#define _TIF_DEBUGCTLMSR	(1 << TIF_DEBUGCTLMSR)
+#define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
+#define _TIF_BTS_TRACE_TS	(1 << TIF_BTS_TRACE_TS)
+#define _TIF_NOTSC		(1 << TIF_NOTSC)
+
+/* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK							\
+	(0x0000FFFF &							\
+	 ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|_TIF_SECCOMP))
+/* work to do on any return to user space */
+#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
+
+#define _TIF_DO_NOTIFY_MASK						\
+	(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
+
+/* flags to check in __switch_to() */
+#define _TIF_WORK_CTXSW							\
+	(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS|_TIF_NOTSC)
+#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
+
+#define PREEMPT_ACTIVE     0x10000000
+
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#define TS_USEDFPU		0x0001	/* FPU was used by this task
+					   this quantum (SMP) */
+#define TS_COMPAT		0x0002	/* 32bit syscall active */
+#define TS_POLLING		0x0004	/* true if in idle loop
+					   and not sleeping */
+
+#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
+
+#endif /* !X86_32 */
+
+
 #ifndef __ASSEMBLY__
 extern void arch_task_cache_init(void);
 extern void free_thread_info(struct thread_info *ti);
diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h
deleted file mode 100644
index 53185996209..00000000000
--- a/include/asm-x86/thread_info_32.h
+++ /dev/null
@@ -1,196 +0,0 @@
-/* thread_info.h: i386 low-level thread information
- *
- * Copyright (C) 2002  David Howells (dhowells@redhat.com)
- * - Incorporating suggestions made by Linus Torvalds and Dave Miller
- */
-
-#ifndef _ASM_THREAD_INFO_H
-#define _ASM_THREAD_INFO_H
-
-#ifdef __KERNEL__
-
-#include <linux/compiler.h>
-#include <asm/page.h>
-
-#ifndef __ASSEMBLY__
-#include <asm/processor.h>
-#endif
-
-/*
- * low level task data that entry.S needs immediate access to
- * - this struct should fit entirely inside of one cache line
- * - this struct shares the supervisor stack pages
- * - if the contents of this structure are changed,
- *   the assembly constants must also be changed
- */
-#ifndef __ASSEMBLY__
-
-struct thread_info {
-	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
-	unsigned long		flags;		/* low level flags */
-	unsigned long		status;		/* thread-synchronous flags */
-	__u32			cpu;		/* current CPU */
-	int			preempt_count;	/* 0 => preemptable,
-						   <0 => BUG */
-	mm_segment_t		addr_limit;	/* thread address space:
-						   0-0xBFFFFFFF user-thread
-						   0-0xFFFFFFFF kernel-thread
-						*/
-	void			*sysenter_return;
-	struct restart_block    restart_block;
-	unsigned long           previous_esp;   /* ESP of the previous stack in
-						   case of nested (IRQ) stacks
-						*/
-	__u8			supervisor_stack[0];
-};
-
-#else /* !__ASSEMBLY__ */
-
-#include <asm/asm-offsets.h>
-
-#endif
-
-#define PREEMPT_ACTIVE		0x10000000
-#ifdef CONFIG_4KSTACKS
-#define THREAD_SIZE            (4096)
-#else
-#define THREAD_SIZE		(8192)
-#endif
-
-#define STACK_WARN             (THREAD_SIZE/8)
-/*
- * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
- */
-#ifndef __ASSEMBLY__
-
-#define INIT_THREAD_INFO(tsk)			\
-{						\
-	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
-	.flags		= 0,			\
-	.cpu		= 0,			\
-	.preempt_count	= 1,			\
-	.addr_limit	= KERNEL_DS,		\
-	.restart_block = {			\
-		.fn = do_no_restart_syscall,	\
-	},					\
-}
-
-#define init_thread_info	(init_thread_union.thread_info)
-#define init_stack		(init_thread_union.stack)
-
-
-/* how to get the current stack pointer from C */
-register unsigned long current_stack_pointer asm("esp") __used;
-
-/* how to get the thread information struct from C */
-static inline struct thread_info *current_thread_info(void)
-{
-	return (struct thread_info *)
-		(current_stack_pointer & ~(THREAD_SIZE - 1));
-}
-
-/* thread information allocation */
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk) ((struct thread_info *)			\
-	__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(THREAD_SIZE)))
-#else
-#define alloc_thread_info(tsk) ((struct thread_info *)			\
-	__get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE)))
-#endif
-
-#else /* !__ASSEMBLY__ */
-
-/* how to get the thread information struct from ASM */
-#define GET_THREAD_INFO(reg)	 \
-	movl $-THREAD_SIZE, reg; \
-	andl %esp, reg
-
-/* use this one if reg already contains %esp */
-#define GET_THREAD_INFO_WITH_ESP(reg) \
-	andl $-THREAD_SIZE, reg
-
-#endif
-
-/*
- * thread information flags
- * - these are process state flags that various
- *   assembly files may need to access
- * - pending work-to-be-done flags are in LSW
- * - other flags in MSW
- */
-#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
-#define TIF_SIGPENDING		1	/* signal pending */
-#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_SINGLESTEP		3	/* restore singlestep on return to
-					   user mode */
-#define TIF_IRET		4	/* return with iret */
-#define TIF_SYSCALL_EMU		5	/* syscall emulation active */
-#define TIF_SYSCALL_AUDIT	6	/* syscall auditing active */
-#define TIF_SECCOMP		7	/* secure computing */
-#define TIF_RESTORE_SIGMASK	8	/* restore signal mask in do_signal() */
-#define TIF_HRTICK_RESCHED	9	/* reprogram hrtick timer */
-#define TIF_MEMDIE		16
-#define TIF_DEBUG		17	/* uses debug registers */
-#define TIF_IO_BITMAP		18	/* uses I/O bitmap */
-#define TIF_FREEZE		19	/* is freezing for suspend */
-#define TIF_NOTSC		20	/* TSC is not accessible in userland */
-#define TIF_FORCED_TF		21	/* true if TF in eflags artificially */
-#define TIF_DEBUGCTLMSR		22	/* uses thread_struct.debugctlmsr */
-#define TIF_DS_AREA_MSR 	23      /* uses thread_struct.ds_area_msr */
-#define TIF_BTS_TRACE_TS        24      /* record scheduling event timestamps */
-
-#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
-#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
-#define _TIF_IRET		(1 << TIF_IRET)
-#define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
-#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
-#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
-#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
-#define _TIF_HRTICK_RESCHED	(1 << TIF_HRTICK_RESCHED)
-#define _TIF_DEBUG		(1 << TIF_DEBUG)
-#define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
-#define _TIF_NOTSC		(1 << TIF_NOTSC)
-#define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
-#define _TIF_DEBUGCTLMSR	(1 << TIF_DEBUGCTLMSR)
-#define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
-#define _TIF_BTS_TRACE_TS	(1 << TIF_BTS_TRACE_TS)
-
-/* work to do on interrupt/exception return */
-#define _TIF_WORK_MASK							\
-	(0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT |	\
-			_TIF_SECCOMP | _TIF_SYSCALL_EMU))
-/* work to do on any return to u-space */
-#define _TIF_ALLWORK_MASK	(0x0000FFFF & ~_TIF_SECCOMP)
-
-/* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW						\
-	(_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR |	\
-	 _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS)
-#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
-#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG)
-
-
-/*
- * Thread-synchronous status.
- *
- * This is different from the flags in that nobody else
- * ever touches our thread-synchronous status, so we don't
- * have to worry about atomic accesses.
- */
-#define TS_USEDFPU		0x0001	/* FPU was used by this task
-					   this quantum (SMP) */
-#define TS_POLLING		0x0002	/* True if in idle loop
-					   and not sleeping */
-
-#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_THREAD_INFO_H */
diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h
deleted file mode 100644
index ed664e874de..00000000000
--- a/include/asm-x86/thread_info_64.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/* thread_info.h: x86_64 low-level thread information
- *
- * Copyright (C) 2002  David Howells (dhowells@redhat.com)
- * - Incorporating suggestions made by Linus Torvalds and Dave Miller
- */
-
-#ifndef _ASM_THREAD_INFO_H
-#define _ASM_THREAD_INFO_H
-
-#ifdef __KERNEL__
-
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/pda.h>
-
-/*
- * low level task data that entry.S needs immediate access to
- * - this struct should fit entirely inside of one cache line
- * - this struct shares the supervisor stack pages
- */
-#ifndef __ASSEMBLY__
-struct task_struct;
-struct exec_domain;
-#include <asm/processor.h>
-
-struct thread_info {
-	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
-	__u32			flags;		/* low level flags */
-	__u32			status;		/* thread synchronous flags */
-	__u32			cpu;		/* current CPU */
-	int 			preempt_count;	/* 0 => preemptable,
-						   <0 => BUG */
-	mm_segment_t		addr_limit;
-	struct restart_block    restart_block;
-#ifdef CONFIG_IA32_EMULATION
-	void __user		*sysenter_return;
-#endif
-};
-#endif
-
-/*
- * macros/functions for gaining access to the thread information structure
- * preempt_count needs to be 1 initially, until the scheduler is functional.
- */
-#ifndef __ASSEMBLY__
-#define INIT_THREAD_INFO(tsk)			\
-{						\
-	.task	       = &tsk,			\
-	.exec_domain   = &default_exec_domain,	\
-	.flags	       = 0,			\
-	.cpu	       = 0,			\
-	.preempt_count = 1,			\
-	.addr_limit     = KERNEL_DS,		\
-	.restart_block = {			\
-		.fn = do_no_restart_syscall,	\
-	},					\
-}
-
-#define init_thread_info	(init_thread_union.thread_info)
-#define init_stack		(init_thread_union.stack)
-
-static inline struct thread_info *current_thread_info(void)
-{
-	struct thread_info *ti;
-	ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
-	return ti;
-}
-
-/* do not use in interrupt context */
-static inline struct thread_info *stack_thread_info(void)
-{
-	struct thread_info *ti;
-	asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1)));
-	return ti;
-}
-
-/* thread information allocation */
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO)
-#else
-#define THREAD_FLAGS GFP_KERNEL
-#endif
-
-#define alloc_thread_info(tsk)						\
-	((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
-
-#else /* !__ASSEMBLY__ */
-
-/* how to get the thread information struct from ASM */
-#define GET_THREAD_INFO(reg) \
-	movq %gs:pda_kernelstack,reg ; \
-	subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
-
-#endif
-
-/*
- * thread information flags
- * - these are process state flags that various assembly files
- *   may need to access
- * - pending work-to-be-done flags are in LSW
- * - other flags in MSW
- * Warning: layout of LSW is hardcoded in entry.S
- */
-#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
-#define TIF_SIGPENDING		2	/* signal pending */
-#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
-#define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
-#define TIF_IRET		5	/* force IRET */
-#define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
-#define TIF_SECCOMP		8	/* secure computing */
-#define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal */
-#define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
-#define TIF_HRTICK_RESCHED	11	/* reprogram hrtick timer */
-/* 16 free */
-#define TIF_IA32		17	/* 32bit process */
-#define TIF_FORK		18	/* ret_from_fork */
-#define TIF_ABI_PENDING		19
-#define TIF_MEMDIE		20
-#define TIF_DEBUG		21	/* uses debug registers */
-#define TIF_IO_BITMAP		22	/* uses I/O bitmap */
-#define TIF_FREEZE		23	/* is freezing for suspend */
-#define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
-#define TIF_DEBUGCTLMSR		25	/* uses thread_struct.debugctlmsr */
-#define TIF_DS_AREA_MSR		26      /* uses thread_struct.ds_area_msr */
-#define TIF_BTS_TRACE_TS	27      /* record scheduling event timestamps */
-#define TIF_NOTSC		28	/* TSC is not accessible in userland */
-
-#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
-#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
-#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
-#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_IRET		(1 << TIF_IRET)
-#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
-#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
-#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
-#define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
-#define _TIF_HRTICK_RESCHED	(1 << TIF_HRTICK_RESCHED)
-#define _TIF_IA32		(1 << TIF_IA32)
-#define _TIF_FORK		(1 << TIF_FORK)
-#define _TIF_ABI_PENDING	(1 << TIF_ABI_PENDING)
-#define _TIF_DEBUG		(1 << TIF_DEBUG)
-#define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
-#define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
-#define _TIF_DEBUGCTLMSR	(1 << TIF_DEBUGCTLMSR)
-#define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
-#define _TIF_BTS_TRACE_TS	(1 << TIF_BTS_TRACE_TS)
-#define _TIF_NOTSC		(1 << TIF_NOTSC)
-
-/* work to do on interrupt/exception return */
-#define _TIF_WORK_MASK							\
-	(0x0000FFFF &							\
-	 ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|_TIF_SECCOMP))
-/* work to do on any return to user space */
-#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
-
-#define _TIF_DO_NOTIFY_MASK						\
-	(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
-
-/* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW							\
-	(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS|_TIF_NOTSC)
-#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
-#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
-
-#define PREEMPT_ACTIVE     0x10000000
-
-/*
- * Thread-synchronous status.
- *
- * This is different from the flags in that nobody else
- * ever touches our thread-synchronous status, so we don't
- * have to worry about atomic accesses.
- */
-#define TS_USEDFPU		0x0001	/* FPU was used by this task
-					   this quantum (SMP) */
-#define TS_COMPAT		0x0002	/* 32bit syscall active */
-#define TS_POLLING		0x0004	/* true if in idle loop
-					   and not sleeping */
-
-#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_THREAD_INFO_H */
-- 
cgit v1.2.3-70-g09d2


From 12a638e13c68bbe187782518dab375f4fa800fc4 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 28 Apr 2008 18:52:33 -0700
Subject: x86: threadinfo: common include files

Move shared includes to a common area in thread_info.h

Adds asm/types.h for x86_64 and linux/compiler.h for x86_32. Not needed but
we can avoid some ifdeffing and it simplifies later joining.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/thread_info.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index d59384ac0ba..d2dc1a3b5d6 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -7,9 +7,11 @@
 #ifndef _ASM_X86_THREAD_INFO_H
 #define _ASM_X86_THREAD_INFO_H
 
-#ifdef CONFIG_X86_32
 #include <linux/compiler.h>
 #include <asm/page.h>
+#include <asm/types.h>
+
+#ifdef CONFIG_X86_32
 
 #ifndef __ASSEMBLY__
 #include <asm/processor.h>
@@ -192,8 +194,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #else /* X86_32 */
 
-#include <asm/page.h>
-#include <asm/types.h>
 #include <asm/pda.h>
 
 /*
-- 
cgit v1.2.3-70-g09d2


From f2ea3b1d4d7ab66d86da57899993282f3deb1f74 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 28 Apr 2008 18:52:34 -0700
Subject: x86: threadinfo: merge thread sync state definitions

Merge both. x86_64 has an additional TS_COMPAT that is harmless
for 32 bit.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/thread_info.h | 22 +++-------------------
 1 file changed, 3 insertions(+), 19 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index d2dc1a3b5d6..4b91f59de8f 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -177,21 +177,6 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG)
 
-
-/*
- * Thread-synchronous status.
- *
- * This is different from the flags in that nobody else
- * ever touches our thread-synchronous status, so we don't
- * have to worry about atomic accesses.
- */
-#define TS_USEDFPU		0x0001	/* FPU was used by this task
-					   this quantum (SMP) */
-#define TS_POLLING		0x0002	/* True if in idle loop
-					   and not sleeping */
-
-#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
-
 #else /* X86_32 */
 
 #include <asm/pda.h>
@@ -349,6 +334,8 @@ static inline struct thread_info *stack_thread_info(void)
 
 #define PREEMPT_ACTIVE     0x10000000
 
+#endif /* !X86_32 */
+
 /*
  * Thread-synchronous status.
  *
@@ -358,15 +345,12 @@ static inline struct thread_info *stack_thread_info(void)
  */
 #define TS_USEDFPU		0x0001	/* FPU was used by this task
 					   this quantum (SMP) */
-#define TS_COMPAT		0x0002	/* 32bit syscall active */
+#define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
 #define TS_POLLING		0x0004	/* true if in idle loop
 					   and not sleeping */
 
 #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
 
-#endif /* !X86_32 */
-
-
 #ifndef __ASSEMBLY__
 extern void arch_task_cache_init(void);
 extern void free_thread_info(struct thread_info *ti);
-- 
cgit v1.2.3-70-g09d2


From 006c484bb3d9547e82a33a09668c9b54b912c8fb Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 28 Apr 2008 18:52:35 -0700
Subject: x86: common thread_info definitions

Merge the thread_info definition into one structure definition for both arches.

The __u32 is equal to unsigned long for 32 bit.

sysenter_return is used both for the IA32 emulation for 64 and x86_32.
Avoid complicated #ifdef by simply always including it.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/thread_info.h | 55 ++++++++++---------------------------------
 1 file changed, 12 insertions(+), 43 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 4b91f59de8f..71b0880d80b 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -11,47 +11,42 @@
 #include <asm/page.h>
 #include <asm/types.h>
 
-#ifdef CONFIG_X86_32
-
-#ifndef __ASSEMBLY__
-#include <asm/processor.h>
-#endif
-
 /*
  * low level task data that entry.S needs immediate access to
  * - this struct should fit entirely inside of one cache line
  * - this struct shares the supervisor stack pages
- * - if the contents of this structure are changed,
- *   the assembly constants must also be changed
  */
 #ifndef __ASSEMBLY__
+struct task_struct;
+struct exec_domain;
+#include <asm/processor.h>
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
 	struct exec_domain	*exec_domain;	/* execution domain */
-	unsigned long		flags;		/* low level flags */
-	unsigned long		status;		/* thread-synchronous flags */
+	__u32			flags;		/* low level flags */
+	__u32			status;		/* thread synchronous flags */
 	__u32			cpu;		/* current CPU */
-	int			preempt_count;	/* 0 => preemptable,
+	int 			preempt_count;	/* 0 => preemptable,
 						   <0 => BUG */
-	mm_segment_t		addr_limit;	/* thread address space:
-						   0-0xBFFFFFFF user-thread
-						   0-0xFFFFFFFF kernel-thread
-						*/
-	void			*sysenter_return;
+	mm_segment_t		addr_limit;
 	struct restart_block    restart_block;
+	void __user		*sysenter_return;
+#ifdef CONFIG_X86_32
 	unsigned long           previous_esp;   /* ESP of the previous stack in
 						   case of nested (IRQ) stacks
 						*/
 	__u8			supervisor_stack[0];
+#endif
 };
-
 #else /* !__ASSEMBLY__ */
 
 #include <asm/asm-offsets.h>
 
 #endif
 
+#ifdef CONFIG_X86_32
+
 #define PREEMPT_ACTIVE		0x10000000
 #ifdef CONFIG_4KSTACKS
 #define THREAD_SIZE            (4096)
@@ -181,32 +176,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #include <asm/pda.h>
 
-/*
- * low level task data that entry.S needs immediate access to
- * - this struct should fit entirely inside of one cache line
- * - this struct shares the supervisor stack pages
- */
-#ifndef __ASSEMBLY__
-struct task_struct;
-struct exec_domain;
-#include <asm/processor.h>
-
-struct thread_info {
-	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
-	__u32			flags;		/* low level flags */
-	__u32			status;		/* thread synchronous flags */
-	__u32			cpu;		/* current CPU */
-	int 			preempt_count;	/* 0 => preemptable,
-						   <0 => BUG */
-	mm_segment_t		addr_limit;
-	struct restart_block    restart_block;
-#ifdef CONFIG_IA32_EMULATION
-	void __user		*sysenter_return;
-#endif
-};
-#endif
-
 /*
  * macros/functions for gaining access to the thread information structure
  * preempt_count needs to be 1 initially, until the scheduler is functional.
-- 
cgit v1.2.3-70-g09d2


From 3351cc03c0762353225a79507e38db4c1e656d52 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 28 Apr 2008 18:52:36 -0700
Subject: x86: threadinfo: merge INIT_THREAD_INFO

Both definitions are the same. So move to common x86 area.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/thread_info.h | 49 +++++++++++++++----------------------------
 1 file changed, 17 insertions(+), 32 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 71b0880d80b..8cd52d4bfb0 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -39,6 +39,23 @@ struct thread_info {
 	__u8			supervisor_stack[0];
 #endif
 };
+
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task		= &tsk,			\
+	.exec_domain	= &default_exec_domain,	\
+	.flags		= 0,			\
+	.cpu		= 0,			\
+	.preempt_count	= 1,			\
+	.addr_limit	= KERNEL_DS,		\
+	.restart_block = {			\
+		.fn = do_no_restart_syscall,	\
+	},					\
+}
+
+#define init_thread_info	(init_thread_union.thread_info)
+#define init_stack		(init_thread_union.stack)
+
 #else /* !__ASSEMBLY__ */
 
 #include <asm/asm-offsets.h>
@@ -62,22 +79,6 @@ struct thread_info {
  */
 #ifndef __ASSEMBLY__
 
-#define INIT_THREAD_INFO(tsk)			\
-{						\
-	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
-	.flags		= 0,			\
-	.cpu		= 0,			\
-	.preempt_count	= 1,			\
-	.addr_limit	= KERNEL_DS,		\
-	.restart_block = {			\
-		.fn = do_no_restart_syscall,	\
-	},					\
-}
-
-#define init_thread_info	(init_thread_union.thread_info)
-#define init_stack		(init_thread_union.stack)
-
 
 /* how to get the current stack pointer from C */
 register unsigned long current_stack_pointer asm("esp") __used;
@@ -181,22 +182,6 @@ static inline struct thread_info *current_thread_info(void)
  * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
 #ifndef __ASSEMBLY__
-#define INIT_THREAD_INFO(tsk)			\
-{						\
-	.task	       = &tsk,			\
-	.exec_domain   = &default_exec_domain,	\
-	.flags	       = 0,			\
-	.cpu	       = 0,			\
-	.preempt_count = 1,			\
-	.addr_limit     = KERNEL_DS,		\
-	.restart_block = {			\
-		.fn = do_no_restart_syscall,	\
-	},					\
-}
-
-#define init_thread_info	(init_thread_union.thread_info)
-#define init_stack		(init_thread_union.stack)
-
 static inline struct thread_info *current_thread_info(void)
 {
 	struct thread_info *ti;
-- 
cgit v1.2.3-70-g09d2


From 24e2de6e28a453cd114b06215df2f9931cd0c342 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 28 Apr 2008 18:52:37 -0700
Subject: x86: thread_info: PREEMPT_ACTIVE

Same for both 32 and 64 bit so simply put it in to the common area.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/thread_info.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 8cd52d4bfb0..f8d5cf516ac 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -62,9 +62,10 @@ struct thread_info {
 
 #endif
 
+#define PREEMPT_ACTIVE		0x10000000
+
 #ifdef CONFIG_X86_32
 
-#define PREEMPT_ACTIVE		0x10000000
 #ifdef CONFIG_4KSTACKS
 #define THREAD_SIZE            (4096)
 #else
@@ -286,8 +287,6 @@ static inline struct thread_info *stack_thread_info(void)
 #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
 
-#define PREEMPT_ACTIVE     0x10000000
-
 #endif /* !X86_32 */
 
 /*
-- 
cgit v1.2.3-70-g09d2


From e57549b017552f7a493b366f5ccd4781801083e4 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 28 Apr 2008 18:52:38 -0700
Subject: x86: thread_info: merge TIF_ flags.

Both TIF lists are essentially the same. x86_32 also has TIF_SYSCALL_EMU which
must be undefined for the 64 bit case.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/thread_info.h | 162 ++++++++++++++++--------------------------
 1 file changed, 61 insertions(+), 101 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index f8d5cf516ac..983743a14b9 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -62,6 +62,67 @@ struct thread_info {
 
 #endif
 
+/*
+ * thread information flags
+ * - these are process state flags that various assembly files
+ *   may need to access
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ * Warning: layout of LSW is hardcoded in entry.S
+ */
+#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_SIGPENDING		2	/* signal pending */
+#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+#define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
+#define TIF_IRET		5	/* force IRET */
+#ifdef CONFIG_X86_32
+#define TIF_SYSCALL_EMU		6	/* syscall emulation active */
+#endif
+#define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
+#define TIF_SECCOMP		8	/* secure computing */
+#define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal */
+#define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
+#define TIF_HRTICK_RESCHED	11	/* reprogram hrtick timer */
+#define TIF_NOTSC		16	/* TSC is not accessible in userland */
+#define TIF_IA32		17	/* 32bit process */
+#define TIF_FORK		18	/* ret_from_fork */
+#define TIF_ABI_PENDING		19
+#define TIF_MEMDIE		20
+#define TIF_DEBUG		21	/* uses debug registers */
+#define TIF_IO_BITMAP		22	/* uses I/O bitmap */
+#define TIF_FREEZE		23	/* is freezing for suspend */
+#define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
+#define TIF_DEBUGCTLMSR		25	/* uses thread_struct.debugctlmsr */
+#define TIF_DS_AREA_MSR		26      /* uses thread_struct.ds_area_msr */
+#define TIF_BTS_TRACE_TS	27      /* record scheduling event timestamps */
+
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
+#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_IRET		(1 << TIF_IRET)
+#ifdef CONFIG_X86_32
+#define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
+#else
+#define _TIF_SYSCALL_EMU	0
+#endif
+#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
+#define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
+#define _TIF_HRTICK_RESCHED	(1 << TIF_HRTICK_RESCHED)
+#define _TIF_NOTSC		(1 << TIF_NOTSC)
+#define _TIF_IA32		(1 << TIF_IA32)
+#define _TIF_FORK		(1 << TIF_FORK)
+#define _TIF_ABI_PENDING	(1 << TIF_ABI_PENDING)
+#define _TIF_DEBUG		(1 << TIF_DEBUG)
+#define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
+#define _TIF_FREEZE		(1 << TIF_FREEZE)
+#define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
+#define _TIF_DEBUGCTLMSR	(1 << TIF_DEBUGCTLMSR)
+#define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
+#define _TIF_BTS_TRACE_TS	(1 << TIF_BTS_TRACE_TS)
+
 #define PREEMPT_ACTIVE		0x10000000
 
 #ifdef CONFIG_X86_32
@@ -113,53 +174,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif
 
-/*
- * thread information flags
- * - these are process state flags that various
- *   assembly files may need to access
- * - pending work-to-be-done flags are in LSW
- * - other flags in MSW
- */
-#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
-#define TIF_SIGPENDING		1	/* signal pending */
-#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_SINGLESTEP		3	/* restore singlestep on return to
-					   user mode */
-#define TIF_IRET		4	/* return with iret */
-#define TIF_SYSCALL_EMU		5	/* syscall emulation active */
-#define TIF_SYSCALL_AUDIT	6	/* syscall auditing active */
-#define TIF_SECCOMP		7	/* secure computing */
-#define TIF_RESTORE_SIGMASK	8	/* restore signal mask in do_signal() */
-#define TIF_HRTICK_RESCHED	9	/* reprogram hrtick timer */
-#define TIF_MEMDIE		16
-#define TIF_DEBUG		17	/* uses debug registers */
-#define TIF_IO_BITMAP		18	/* uses I/O bitmap */
-#define TIF_FREEZE		19	/* is freezing for suspend */
-#define TIF_NOTSC		20	/* TSC is not accessible in userland */
-#define TIF_FORCED_TF		21	/* true if TF in eflags artificially */
-#define TIF_DEBUGCTLMSR		22	/* uses thread_struct.debugctlmsr */
-#define TIF_DS_AREA_MSR 	23      /* uses thread_struct.ds_area_msr */
-#define TIF_BTS_TRACE_TS        24      /* record scheduling event timestamps */
-
-#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
-#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
-#define _TIF_IRET		(1 << TIF_IRET)
-#define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
-#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
-#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
-#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
-#define _TIF_HRTICK_RESCHED	(1 << TIF_HRTICK_RESCHED)
-#define _TIF_DEBUG		(1 << TIF_DEBUG)
-#define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
-#define _TIF_NOTSC		(1 << TIF_NOTSC)
-#define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
-#define _TIF_DEBUGCTLMSR	(1 << TIF_DEBUGCTLMSR)
-#define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
-#define _TIF_BTS_TRACE_TS	(1 << TIF_BTS_TRACE_TS)
-
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK							\
 	(0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT |	\
@@ -217,60 +231,6 @@ static inline struct thread_info *stack_thread_info(void)
 
 #endif
 
-/*
- * thread information flags
- * - these are process state flags that various assembly files
- *   may need to access
- * - pending work-to-be-done flags are in LSW
- * - other flags in MSW
- * Warning: layout of LSW is hardcoded in entry.S
- */
-#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
-#define TIF_SIGPENDING		2	/* signal pending */
-#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
-#define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
-#define TIF_IRET		5	/* force IRET */
-#define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
-#define TIF_SECCOMP		8	/* secure computing */
-#define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal */
-#define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
-#define TIF_HRTICK_RESCHED	11	/* reprogram hrtick timer */
-/* 16 free */
-#define TIF_IA32		17	/* 32bit process */
-#define TIF_FORK		18	/* ret_from_fork */
-#define TIF_ABI_PENDING		19
-#define TIF_MEMDIE		20
-#define TIF_DEBUG		21	/* uses debug registers */
-#define TIF_IO_BITMAP		22	/* uses I/O bitmap */
-#define TIF_FREEZE		23	/* is freezing for suspend */
-#define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
-#define TIF_DEBUGCTLMSR		25	/* uses thread_struct.debugctlmsr */
-#define TIF_DS_AREA_MSR		26      /* uses thread_struct.ds_area_msr */
-#define TIF_BTS_TRACE_TS	27      /* record scheduling event timestamps */
-#define TIF_NOTSC		28	/* TSC is not accessible in userland */
-
-#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
-#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
-#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
-#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_IRET		(1 << TIF_IRET)
-#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
-#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
-#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
-#define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
-#define _TIF_HRTICK_RESCHED	(1 << TIF_HRTICK_RESCHED)
-#define _TIF_IA32		(1 << TIF_IA32)
-#define _TIF_FORK		(1 << TIF_FORK)
-#define _TIF_ABI_PENDING	(1 << TIF_ABI_PENDING)
-#define _TIF_DEBUG		(1 << TIF_DEBUG)
-#define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
-#define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
-#define _TIF_DEBUGCTLMSR	(1 << TIF_DEBUGCTLMSR)
-#define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
-#define _TIF_BTS_TRACE_TS	(1 << TIF_BTS_TRACE_TS)
-#define _TIF_NOTSC		(1 << TIF_NOTSC)
-
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK							\
 	(0x0000FFFF &							\
-- 
cgit v1.2.3-70-g09d2


From 00c1bb133cf351fa3904b00a48a9cf535d018de6 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 28 Apr 2008 18:52:39 -0700
Subject: x86: thread_info: merge tif masks

The TIF masks are basically the same. x86_32 also has _TIF_SYSCALL_EMU which is
zero for the 64 bit case. The tif masks become the same.

x86_64 has an additional _TIF_DONOTIFY_MASK. Does not hurt for the 32 bit case
since it is only used in x86_64 arch code.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/thread_info.h | 51 ++++++++++++++++++-------------------------
 1 file changed, 21 insertions(+), 30 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 983743a14b9..b7cd41308e5 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -123,6 +123,27 @@ struct thread_info {
 #define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
 #define _TIF_BTS_TRACE_TS	(1 << TIF_BTS_TRACE_TS)
 
+/* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK							\
+	(0x0000FFFF &							\
+	 ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|	\
+	 _TIF_SECCOMP|_TIF_SYSCALL_EMU))
+
+/* work to do on any return to user space */
+#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
+
+#define _TIF_DO_NOTIFY_MASK						\
+	(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
+
+/* flags to check in __switch_to() */
+#define _TIF_WORK_CTXSW							\
+	(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \
+								_TIF_NOTSC)
+
+#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
+
+
 #define PREEMPT_ACTIVE		0x10000000
 
 #ifdef CONFIG_X86_32
@@ -174,20 +195,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif
 
-/* work to do on interrupt/exception return */
-#define _TIF_WORK_MASK							\
-	(0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT |	\
-			_TIF_SECCOMP | _TIF_SYSCALL_EMU))
-/* work to do on any return to u-space */
-#define _TIF_ALLWORK_MASK	(0x0000FFFF & ~_TIF_SECCOMP)
-
-/* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW						\
-	(_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR |	\
-	 _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS)
-#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
-#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG)
-
 #else /* X86_32 */
 
 #include <asm/pda.h>
@@ -231,22 +238,6 @@ static inline struct thread_info *stack_thread_info(void)
 
 #endif
 
-/* work to do on interrupt/exception return */
-#define _TIF_WORK_MASK							\
-	(0x0000FFFF &							\
-	 ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|_TIF_SECCOMP))
-/* work to do on any return to user space */
-#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
-
-#define _TIF_DO_NOTIFY_MASK						\
-	(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
-
-/* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW							\
-	(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS|_TIF_NOTSC)
-#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
-#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
-
 #endif /* !X86_32 */
 
 /*
-- 
cgit v1.2.3-70-g09d2


From b84200b3a0fafa167185201319940d8df62a8c7b Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 28 Apr 2008 18:52:40 -0700
Subject: x86: thread_info: merge thread_info allocation

Make them similar so that both use THREAD_ORDER and THREAD_FLAGS and have a
THREAD_SIZE definition that is setup in asm/page_xx.h

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/page_32.h     |  8 ++++++++
 include/asm-x86/thread_info.h | 37 +++++++++++--------------------------
 2 files changed, 19 insertions(+), 26 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 424e82f8ae2..50b33ebcf85 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -13,6 +13,14 @@
  */
 #define __PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
 
+#ifdef CONFIG_4KSTACKS
+#define THREAD_ORDER	0
+#else
+#define THREAD_ORDER	1
+#endif
+#define THREAD_SIZE 	(PAGE_SIZE << THREAD_ORDER)
+
+
 #ifdef CONFIG_X86_PAE
 #define __PHYSICAL_MASK_SHIFT	36
 #define __VIRTUAL_MASK_SHIFT	32
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index b7cd41308e5..348f0e0faa3 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -132,6 +132,7 @@ struct thread_info {
 /* work to do on any return to user space */
 #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
 
+/* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
 	(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
 
@@ -143,18 +144,21 @@ struct thread_info {
 #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
 
-
 #define PREEMPT_ACTIVE		0x10000000
 
-#ifdef CONFIG_X86_32
-
-#ifdef CONFIG_4KSTACKS
-#define THREAD_SIZE            (4096)
+/* thread information allocation */
+#ifdef CONFIG_DEBUG_STACK_USAGE
+#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO)
 #else
-#define THREAD_SIZE		(8192)
+#define THREAD_FLAGS GFP_KERNEL
 #endif
 
-#define STACK_WARN             (THREAD_SIZE/8)
+#define alloc_thread_info(tsk)						\
+	((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
+
+#ifdef CONFIG_X86_32
+
+#define STACK_WARN	(THREAD_SIZE/8)
 /*
  * macros/functions for gaining access to the thread information structure
  *
@@ -173,15 +177,6 @@ static inline struct thread_info *current_thread_info(void)
 		(current_stack_pointer & ~(THREAD_SIZE - 1));
 }
 
-/* thread information allocation */
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk) ((struct thread_info *)			\
-	__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(THREAD_SIZE)))
-#else
-#define alloc_thread_info(tsk) ((struct thread_info *)			\
-	__get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE)))
-#endif
-
 #else /* !__ASSEMBLY__ */
 
 /* how to get the thread information struct from ASM */
@@ -219,16 +214,6 @@ static inline struct thread_info *stack_thread_info(void)
 	return ti;
 }
 
-/* thread information allocation */
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO)
-#else
-#define THREAD_FLAGS GFP_KERNEL
-#endif
-
-#define alloc_thread_info(tsk)						\
-	((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
-
 #else /* !__ASSEMBLY__ */
 
 /* how to get the thread information struct from ASM */
-- 
cgit v1.2.3-70-g09d2


From 8a6c160a2a13d82c75a50af7282b906cce948df5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Apr 2008 22:13:44 +0200
Subject: x86: redo thread_info.h change

redo Roland's "signals: x86 TS_RESTORE_SIGMASK" ontop of the unified
thread_info.h file.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/thread_info.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 348f0e0faa3..74481b72ae0 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -80,7 +80,6 @@ struct thread_info {
 #endif
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
-#define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal */
 #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
 #define TIF_HRTICK_RESCHED	11	/* reprogram hrtick timer */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
@@ -108,7 +107,6 @@ struct thread_info {
 #endif
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
-#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
 #define _TIF_HRTICK_RESCHED	(1 << TIF_HRTICK_RESCHED)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
@@ -237,9 +235,20 @@ static inline struct thread_info *stack_thread_info(void)
 #define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
 #define TS_POLLING		0x0004	/* true if in idle loop
 					   and not sleeping */
+#define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal() */
 
 #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
 
+#ifndef __ASSEMBLY__
+#define HAVE_SET_RESTORE_SIGMASK	1
+static inline void set_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	ti->status |= TS_RESTORE_SIGMASK;
+	set_bit(TIF_SIGPENDING, &ti->flags);
+}
+#endif	/* !__ASSEMBLY__ */
+
 #ifndef __ASSEMBLY__
 extern void arch_task_cache_init(void);
 extern void free_thread_info(struct thread_info *ti);
-- 
cgit v1.2.3-70-g09d2


From 6859a8402945cf1d74af75a2e1aa4e327a506ab4 Mon Sep 17 00:00:00 2001
From: Alan Mayer <ajm@sgi.com>
Date: Wed, 26 Mar 2008 16:11:31 -0500
Subject: x86: resize NR_IRQS for large machines

On machines with very large numbers of cpus, tables that are dimensioned
by NR_IRQS get very large, especially the irq_desc table.  They are also
very sparsely used.  When the cpu count is > MAX_IO_APICS, use MAX_IO_APICS
to set NR_IRQS, otherwise use NR_CPUS.

Signed-off-by: Alan Mayer <ajm@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/irq_64.h    | 6 ++++++
 include/linux/kernel_stat.h | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h
index 083d35a62c9..7608176590b 100644
--- a/include/asm-x86/irq_64.h
+++ b/include/asm-x86/irq_64.h
@@ -10,6 +10,8 @@
  *	<tomsoft@informatik.tu-chemnitz.de>
  */
 
+#include <asm/apicdef.h>
+
 #define TIMER_IRQ 0
 
 /*
@@ -31,7 +33,11 @@
 
 #define FIRST_SYSTEM_VECTOR	0xef   /* duplicated in hw_irq.h */
 
+#if NR_CPUS < MAX_IO_APICS
 #define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
+#else
+#define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
+#endif
 #define NR_IRQ_VECTORS NR_IRQS
 
 static inline int irq_canonicalize(int irq)
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index e8ffce898bf..cf9f40a91c9 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -1,11 +1,11 @@
 #ifndef _LINUX_KERNEL_STAT_H
 #define _LINUX_KERNEL_STAT_H
 
-#include <asm/irq.h>
 #include <linux/smp.h>
 #include <linux/threads.h>
 #include <linux/percpu.h>
 #include <linux/cpumask.h>
+#include <asm/irq.h>
 #include <asm/cputime.h>
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 2e0884362d1fe36ef2d673d763d6ce35e2044e66 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 2 May 2008 19:00:30 +0200
Subject: x86: move common declarations to hw_irq.h

Move the common declarations from hw_irq_32/64 into hw_irq.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/hw_irq.h    | 81 +++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/hw_irq_32.h | 61 ----------------------------------
 include/asm-x86/hw_irq_64.h | 68 -------------------------------------
 3 files changed, 81 insertions(+), 129 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index bf025399d93..38af08ed43c 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -1,5 +1,86 @@
+#ifndef _ASM_HW_IRQ_H
+#define _ASM_HW_IRQ_H
+
+/*
+ * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ *
+ * moved some of the old arch/i386/kernel/irq.h to here. VY
+ *
+ * IRQ/IPI changes taken from work by Thomas Radke
+ * <tomsoft@informatik.tu-chemnitz.de>
+ *
+ * hacked by Andi Kleen for x86-64.
+ * unified by tglx
+ */
+
+#define NMI_VECTOR		0x02
+
+#ifndef __ASSEMBLY__
+
+#include <linux/percpu.h>
+#include <linux/profile.h>
+#include <linux/smp.h>
+
+#include <asm/atomic.h>
+#include <asm/irq.h>
+#include <asm/sections.h>
+
+#define platform_legacy_irq(irq)	((irq) < 16)
+
+/* Interrupt handlers registered during init_IRQ */
+extern void apic_timer_interrupt(void);
+extern void error_interrupt(void);
+extern void spurious_interrupt(void);
+extern void thermal_interrupt(void);
+extern void reschedule_interrupt(void);
+
+extern void invalidate_interrupt(void);
+extern void invalidate_interrupt0(void);
+extern void invalidate_interrupt1(void);
+extern void invalidate_interrupt2(void);
+extern void invalidate_interrupt3(void);
+extern void invalidate_interrupt4(void);
+extern void invalidate_interrupt5(void);
+extern void invalidate_interrupt6(void);
+extern void invalidate_interrupt7(void);
+
+extern void irq_move_cleanup_interrupt(void);
+extern void threshold_interrupt(void);
+
+extern void call_function_interrupt(void);
+
+/* PIC specific functions */
+extern void disable_8259A_irq(unsigned int irq);
+extern void enable_8259A_irq(unsigned int irq);
+extern int i8259A_irq_pending(unsigned int irq);
+extern void make_8259A_irq(unsigned int irq);
+extern void init_8259A(int aeoi);
+
+/* IOAPIC */
+#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
+extern unsigned long io_apic_irqs;
+
+extern void init_VISWS_APIC_irqs(void);
+extern void setup_IO_APIC(void);
+extern void disable_IO_APIC(void);
+extern void print_IO_APIC(void);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+extern void setup_ioapic_dest(void);
+
+/* IPI functions */
+extern void send_IPI_self(int vector);
+extern void send_IPI(int dest, int vector);
+
+/* Statistics */
+extern atomic_t irq_err_count;
+extern atomic_t irq_mis_count;
+
+#endif /* !ASSEMBLY_ */
+
 #ifdef CONFIG_X86_32
 # include "hw_irq_32.h"
 #else
 # include "hw_irq_64.h"
 #endif
+
+#endif
diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h
index ea88054e03f..89fca5af595 100644
--- a/include/asm-x86/hw_irq_32.h
+++ b/include/asm-x86/hw_irq_32.h
@@ -1,66 +1,5 @@
-#ifndef _ASM_HW_IRQ_H
-#define _ASM_HW_IRQ_H
-
-/*
- *	linux/include/asm/hw_irq.h
- *
- *	(C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
- *
- *	moved some of the old arch/i386/kernel/irq.h to here. VY
- *
- *	IRQ/IPI changes taken from work by Thomas Radke
- *	<tomsoft@informatik.tu-chemnitz.de>
- */
-
-#include <linux/profile.h>
-#include <asm/atomic.h>
-#include <asm/irq.h>
-#include <asm/sections.h>
-
-#define NMI_VECTOR		0x02
-
-/*
- * Various low-level irq details needed by irq.c, process.c,
- * time.c, io_apic.c and smp.c
- *
- * Interrupt entry/exit code at both C and assembly level
- */
 
 extern void (*const interrupt[NR_IRQS])(void);
 
-#ifdef CONFIG_SMP
-void reschedule_interrupt(void);
-void invalidate_interrupt(void);
-void call_function_interrupt(void);
-#endif
-
-#ifdef CONFIG_X86_LOCAL_APIC
-void apic_timer_interrupt(void);
-void error_interrupt(void);
-void spurious_interrupt(void);
-void thermal_interrupt(void);
-#define platform_legacy_irq(irq)	((irq) < 16)
-#endif
-
-void disable_8259A_irq(unsigned int irq);
-void enable_8259A_irq(unsigned int irq);
-int i8259A_irq_pending(unsigned int irq);
-void make_8259A_irq(unsigned int irq);
-void init_8259A(int aeoi);
-void send_IPI_self(int vector);
-void init_VISWS_APIC_irqs(void);
-void setup_IO_APIC(void);
-void disable_IO_APIC(void);
-void print_IO_APIC(void);
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
-void send_IPI(int dest, int vector);
-void setup_ioapic_dest(void);
-
-extern unsigned long io_apic_irqs;
-
-extern atomic_t irq_err_count;
-extern atomic_t irq_mis_count;
 
-#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
 
-#endif /* _ASM_HW_IRQ_H */
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
index 0062ef390f6..28674576e9f 100644
--- a/include/asm-x86/hw_irq_64.h
+++ b/include/asm-x86/hw_irq_64.h
@@ -1,28 +1,3 @@
-#ifndef _ASM_HW_IRQ_H
-#define _ASM_HW_IRQ_H
-
-/*
- *	linux/include/asm/hw_irq.h
- *
- *	(C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
- *
- *	moved some of the old arch/i386/kernel/irq.h to here. VY
- *
- *	IRQ/IPI changes taken from work by Thomas Radke
- *	<tomsoft@informatik.tu-chemnitz.de>
- *
- *	hacked by Andi Kleen for x86-64.
- */
-
-#ifndef __ASSEMBLY__
-#include <asm/atomic.h>
-#include <asm/irq.h>
-#include <linux/profile.h>
-#include <linux/smp.h>
-#include <linux/percpu.h>
-#endif
-
-#define NMI_VECTOR		0x02
 /*
  * IDT vectors usable for external interrupt sources start
  * at 0x20:
@@ -96,25 +71,6 @@
 
 #ifndef __ASSEMBLY__
 
-/* Interrupt handlers registered during init_IRQ */
-void apic_timer_interrupt(void);
-void spurious_interrupt(void);
-void error_interrupt(void);
-void reschedule_interrupt(void);
-void call_function_interrupt(void);
-void irq_move_cleanup_interrupt(void);
-void invalidate_interrupt0(void);
-void invalidate_interrupt1(void);
-void invalidate_interrupt2(void);
-void invalidate_interrupt3(void);
-void invalidate_interrupt4(void);
-void invalidate_interrupt5(void);
-void invalidate_interrupt6(void);
-void invalidate_interrupt7(void);
-void thermal_interrupt(void);
-void threshold_interrupt(void);
-void i8254_timer_resume(void);
-
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
 extern void __setup_vector_irq(int cpu);
@@ -127,29 +83,9 @@ extern spinlock_t vector_lock;
  * Interrupt entry/exit code at both C and assembly level
  */
 
-extern void disable_8259A_irq(unsigned int irq);
-extern void enable_8259A_irq(unsigned int irq);
-extern int i8259A_irq_pending(unsigned int irq);
-extern void make_8259A_irq(unsigned int irq);
-extern void init_8259A(int aeoi);
-extern void send_IPI_self(int vector);
-extern void init_VISWS_APIC_irqs(void);
-extern void setup_IO_APIC(void);
 extern void enable_IO_APIC(void);
-extern void disable_IO_APIC(void);
-extern void print_IO_APIC(void);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
-extern void send_IPI(int dest, int vector);
-extern void setup_ioapic_dest(void);
 extern void native_init_IRQ(void);
 
-extern unsigned long io_apic_irqs;
-
-extern atomic_t irq_err_count;
-extern atomic_t irq_mis_count;
-
-#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
-
 #include <asm/ptrace.h>
 
 #define IRQ_NAME2(nr) nr##_interrupt(void)
@@ -166,8 +102,4 @@ extern atomic_t irq_mis_count;
 	    "push $~(" #nr ") ; "		\
 	    "jmp common_interrupt");
 
-#define platform_legacy_irq(irq)	((irq) < 16)
-
 #endif
-
-#endif /* _ASM_HW_IRQ_H */
-- 
cgit v1.2.3-70-g09d2


From 9b7dc567d03d74a1fbae84e88949b6a60d922d82 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 2 May 2008 20:10:09 +0200
Subject: x86: unify interrupt vector defines

The interrupt vector defines are copied 4 times around with minimal
differences. Move them all into asm-x86/irq_vectors.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_32.S                        |   2 +-
 arch/x86/kernel/vmiclock_32.c                     |   3 +-
 arch/x86/mach-visws/visws_apic.c                  |   3 +-
 include/asm-x86/hw_irq.h                          |  12 +-
 include/asm-x86/hw_irq_64.h                       |  71 ---------
 include/asm-x86/irq_32.h                          |   2 +-
 include/asm-x86/irq_64.h                          |  29 +---
 include/asm-x86/irq_vectors.h                     | 168 ++++++++++++++++++++++
 include/asm-x86/mach-default/irq_vectors.h        |  96 -------------
 include/asm-x86/mach-default/irq_vectors_limits.h |  16 ---
 include/asm-x86/mach-visws/irq_vectors.h          |  62 --------
 include/asm-x86/mach-voyager/irq_vectors.h        |  79 ----------
 12 files changed, 184 insertions(+), 359 deletions(-)
 create mode 100644 include/asm-x86/irq_vectors.h
 delete mode 100644 include/asm-x86/mach-default/irq_vectors.h
 delete mode 100644 include/asm-x86/mach-default/irq_vectors_limits.h
 delete mode 100644 include/asm-x86/mach-visws/irq_vectors.h
 delete mode 100644 include/asm-x86/mach-voyager/irq_vectors.h

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 2a609dc3271..0c7f64b99e1 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -51,7 +51,7 @@
 #include <asm/percpu.h>
 #include <asm/dwarf2.h>
 #include <asm/processor-flags.h>
-#include "irq_vectors.h"
+#include <asm/irq_vectors.h>
 
 /*
  * We use macros for low-level operations which need to be overridden
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index a2b030780aa..ba7d19e102b 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -33,8 +33,7 @@
 #include <asm/apic.h>
 #include <asm/timer.h>
 #include <asm/i8253.h>
-
-#include <irq_vectors.h>
+#include <asm/irq_vectors.h>
 
 #define VMI_ONESHOT  (VMI_ALARM_IS_ONESHOT  | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
 #define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c
index cef9cb1d15a..d8b2cfd85d9 100644
--- a/arch/x86/mach-visws/visws_apic.c
+++ b/arch/x86/mach-visws/visws_apic.c
@@ -21,10 +21,9 @@
 #include <asm/io.h>
 #include <asm/apic.h>
 #include <asm/i8259.h>
+#include <asm/irq_vectors.h>
 
 #include "cobalt.h"
-#include "irq_vectors.h"
-
 
 static DEFINE_SPINLOCK(cobalt_lock);
 
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 38af08ed43c..a8c5e8bdaa4 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -13,7 +13,7 @@
  * unified by tglx
  */
 
-#define NMI_VECTOR		0x02
+#include <asm/irq_vectors.h>
 
 #ifndef __ASSEMBLY__
 
@@ -75,6 +75,16 @@ extern void send_IPI(int dest, int vector);
 extern atomic_t irq_err_count;
 extern atomic_t irq_mis_count;
 
+/* Voyager functions */
+extern asmlinkage void vic_cpi_interrupt(void);
+extern asmlinkage void vic_sys_interrupt(void);
+extern asmlinkage void vic_cmn_interrupt(void);
+extern asmlinkage void qic_timer_interrupt(void);
+extern asmlinkage void qic_invalidate_interrupt(void);
+extern asmlinkage void qic_reschedule_interrupt(void);
+extern asmlinkage void qic_enable_irq_interrupt(void);
+extern asmlinkage void qic_call_function_interrupt(void);
+
 #endif /* !ASSEMBLY_ */
 
 #ifdef CONFIG_X86_32
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
index 28674576e9f..98c9d494a71 100644
--- a/include/asm-x86/hw_irq_64.h
+++ b/include/asm-x86/hw_irq_64.h
@@ -1,74 +1,3 @@
-/*
- * IDT vectors usable for external interrupt sources start
- * at 0x20:
- */
-#define FIRST_EXTERNAL_VECTOR	0x20
-
-#define IA32_SYSCALL_VECTOR	0x80
-
-
-/* Reserve the lowest usable priority level 0x20 - 0x2f for triggering
- * cleanup after irq migration.
- */
-#define IRQ_MOVE_CLEANUP_VECTOR	FIRST_EXTERNAL_VECTOR
-
-/*
- * Vectors 0x30-0x3f are used for ISA interrupts.
- */
-#define IRQ0_VECTOR		(FIRST_EXTERNAL_VECTOR + 0x10)
-#define IRQ1_VECTOR		(IRQ0_VECTOR + 1)
-#define IRQ2_VECTOR		(IRQ0_VECTOR + 2)
-#define IRQ3_VECTOR		(IRQ0_VECTOR + 3)
-#define IRQ4_VECTOR		(IRQ0_VECTOR + 4)
-#define IRQ5_VECTOR		(IRQ0_VECTOR + 5)
-#define IRQ6_VECTOR		(IRQ0_VECTOR + 6)
-#define IRQ7_VECTOR		(IRQ0_VECTOR + 7)
-#define IRQ8_VECTOR		(IRQ0_VECTOR + 8)
-#define IRQ9_VECTOR		(IRQ0_VECTOR + 9)
-#define IRQ10_VECTOR		(IRQ0_VECTOR + 10)
-#define IRQ11_VECTOR		(IRQ0_VECTOR + 11)
-#define IRQ12_VECTOR		(IRQ0_VECTOR + 12)
-#define IRQ13_VECTOR		(IRQ0_VECTOR + 13)
-#define IRQ14_VECTOR		(IRQ0_VECTOR + 14)
-#define IRQ15_VECTOR		(IRQ0_VECTOR + 15)
-
-/*
- * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
- *
- *  some of the following vectors are 'rare', they are merged
- *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
- *  TLB, reschedule and local APIC vectors are performance-critical.
- */
-#define SPURIOUS_APIC_VECTOR	0xff
-#define ERROR_APIC_VECTOR	0xfe
-#define RESCHEDULE_VECTOR	0xfd
-#define CALL_FUNCTION_VECTOR	0xfc
-/* fb free - please don't readd KDB here because it's useless
-   (hint - think what a NMI bit does to a vector) */
-#define THERMAL_APIC_VECTOR	0xfa
-#define THRESHOLD_APIC_VECTOR   0xf9
-/* f8 free */
-#define INVALIDATE_TLB_VECTOR_END	0xf7
-#define INVALIDATE_TLB_VECTOR_START	0xf0	/* f0-f7 used for TLB flush */
-
-#define NUM_INVALIDATE_TLB_VECTORS	8
-
-/*
- * Local APIC timer IRQ vector is on a different priority level,
- * to work around the 'lost local interrupt if more than 2 IRQ
- * sources per level' errata.
- */
-#define LOCAL_TIMER_VECTOR	0xef
-
-/*
- * First APIC vector available to drivers: (vectors 0x30-0xee)
- * we start at 0x41 to spread out vectors evenly between priority
- * levels. (0x80 is the syscall vector)
- */
-#define FIRST_DEVICE_VECTOR	(IRQ15_VECTOR + 2)
-#define FIRST_SYSTEM_VECTOR	0xef   /* duplicated in irq.h */
-
-
 #ifndef __ASSEMBLY__
 
 typedef int vector_irq_t[NR_VECTORS];
diff --git a/include/asm-x86/irq_32.h b/include/asm-x86/irq_32.h
index 0b79f318524..c5c7542f79a 100644
--- a/include/asm-x86/irq_32.h
+++ b/include/asm-x86/irq_32.h
@@ -12,7 +12,7 @@
 
 #include <linux/sched.h>
 /* include comes from machine specific directory */
-#include "irq_vectors.h"
+#include <asm/irq_vectors.h>
 #include <asm/thread_info.h>
 
 static inline int irq_canonicalize(int irq)
diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h
index 7608176590b..3037ec667bf 100644
--- a/include/asm-x86/irq_64.h
+++ b/include/asm-x86/irq_64.h
@@ -11,34 +11,7 @@
  */
 
 #include <asm/apicdef.h>
-
-#define TIMER_IRQ 0
-
-/*
- * 16 8259A IRQ's, 208 potential APIC interrupt sources.
- * Right now the APIC is mostly only used for SMP.
- * 256 vectors is an architectural limit. (we can have
- * more than 256 devices theoretically, but they will
- * have to use shared interrupts)
- * Since vectors 0x00-0x1f are used/reserved for the CPU,
- * the usable vector space is 0x20-0xff (224 vectors)
- */
-
-/*
- * The maximum number of vectors supported by x86_64 processors
- * is limited to 256. For processors other than x86_64, NR_VECTORS
- * should be changed accordingly.
- */
-#define NR_VECTORS 256
-
-#define FIRST_SYSTEM_VECTOR	0xef   /* duplicated in hw_irq.h */
-
-#if NR_CPUS < MAX_IO_APICS
-#define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
-#else
-#define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
-#endif
-#define NR_IRQ_VECTORS NR_IRQS
+#include <asm/irq_vectors.h>
 
 static inline int irq_canonicalize(int irq)
 {
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
new file mode 100644
index 00000000000..daceaaf0a3a
--- /dev/null
+++ b/include/asm-x86/irq_vectors.h
@@ -0,0 +1,168 @@
+#ifndef _ASM_IRQ_VECTORS_H
+#define _ASM_IRQ_VECTORS_H
+
+#include <linux/threads.h>
+
+#define NMI_VECTOR		0x02
+
+/*
+ * IDT vectors usable for external interrupt sources start
+ * at 0x20:
+ */
+#define FIRST_EXTERNAL_VECTOR	0x20
+
+#ifdef CONFIG_X86_32
+# define SYSCALL_VECTOR		0x80
+#else
+# define IA32_SYSCALL_VECTOR	0x80
+#endif
+
+/*
+ * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit.
+ *
+ * Reserve the lowest usable priority level 0x20 - 0x2f for triggering
+ * cleanup after irq migration on 64 bit.
+ */
+#define IRQ_MOVE_CLEANUP_VECTOR	FIRST_EXTERNAL_VECTOR
+
+/*
+ * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit
+ */
+#define IRQ0_VECTOR		(FIRST_EXTERNAL_VECTOR + 0x10)
+#define IRQ1_VECTOR		(IRQ0_VECTOR + 1)
+#define IRQ2_VECTOR		(IRQ0_VECTOR + 2)
+#define IRQ3_VECTOR		(IRQ0_VECTOR + 3)
+#define IRQ4_VECTOR		(IRQ0_VECTOR + 4)
+#define IRQ5_VECTOR		(IRQ0_VECTOR + 5)
+#define IRQ6_VECTOR		(IRQ0_VECTOR + 6)
+#define IRQ7_VECTOR		(IRQ0_VECTOR + 7)
+#define IRQ8_VECTOR		(IRQ0_VECTOR + 8)
+#define IRQ9_VECTOR		(IRQ0_VECTOR + 9)
+#define IRQ10_VECTOR		(IRQ0_VECTOR + 10)
+#define IRQ11_VECTOR		(IRQ0_VECTOR + 11)
+#define IRQ12_VECTOR		(IRQ0_VECTOR + 12)
+#define IRQ13_VECTOR		(IRQ0_VECTOR + 13)
+#define IRQ14_VECTOR		(IRQ0_VECTOR + 14)
+#define IRQ15_VECTOR		(IRQ0_VECTOR + 15)
+
+/*
+ * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
+ *
+ *  some of the following vectors are 'rare', they are merged
+ *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
+ *  TLB, reschedule and local APIC vectors are performance-critical.
+ *
+ *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
+ */
+#ifdef CONFIG_X86_32
+
+# define SPURIOUS_APIC_VECTOR		0xff
+# define ERROR_APIC_VECTOR		0xfe
+# define INVALIDATE_TLB_VECTOR		0xfd
+# define RESCHEDULE_VECTOR		0xfc
+# define CALL_FUNCTION_VECTOR		0xfb
+# define THERMAL_APIC_VECTOR		0xf0
+
+#else
+
+#define SPURIOUS_APIC_VECTOR		0xff
+#define ERROR_APIC_VECTOR		0xfe
+#define RESCHEDULE_VECTOR		0xfd
+#define CALL_FUNCTION_VECTOR		0xfc
+#define THERMAL_APIC_VECTOR		0xfa
+#define THRESHOLD_APIC_VECTOR		0xf9
+#define INVALIDATE_TLB_VECTOR_END	0xf7
+#define INVALIDATE_TLB_VECTOR_START	0xf0	/* f0-f7 used for TLB flush */
+
+#define NUM_INVALIDATE_TLB_VECTORS	8
+
+#endif
+
+/*
+ * Local APIC timer IRQ vector is on a different priority level,
+ * to work around the 'lost local interrupt if more than 2 IRQ
+ * sources per level' errata.
+ */
+#define LOCAL_TIMER_VECTOR	0xef
+
+/*
+ * First APIC vector available to drivers: (vectors 0x30-0xee) we
+ * start at 0x31(0x41) to spread out vectors evenly between priority
+ * levels. (0x80 is the syscall vector)
+ */
+#ifdef CONFIG_X86_32
+# define FIRST_DEVICE_VECTOR	0x31
+#else
+# define FIRST_DEVICE_VECTOR	(IRQ15_VECTOR + 2)
+#endif
+
+#define FIRST_SYSTEM_VECTOR	0xef
+
+#define NR_VECTORS		256
+
+#define FPU_IRQ			13
+
+#define	FIRST_VM86_IRQ		3
+#define LAST_VM86_IRQ		15
+#define invalid_vm86_irq(irq)	((irq) < 3 || (irq) > 15)
+
+#if !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER)
+
+# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT)
+
+#  define NR_IRQS		224
+
+#  if (224 >= 32 * NR_CPUS)
+#   define NR_IRQ_VECTORS	NR_IRQS
+#  else
+#   define NR_IRQ_VECTORS	(32 * NR_CPUS)
+#  endif
+
+# else /* IO_APIC || PARAVIRT */
+
+#  define NR_IRQS		16
+#  define NR_IRQ_VECTORS	NR_IRQS
+
+# endif
+
+#else /* !VISWS && !VOYAGER */
+
+# define NR_IRQS		224
+# define NR_IRQ_VECTORS		NR_IRQS
+
+#endif /* VISWS */
+
+/* Voyager specific defines */
+/* These define the CPIs we use in linux */
+#define VIC_CPI_LEVEL0			0
+#define VIC_CPI_LEVEL1			1
+/* now the fake CPIs */
+#define VIC_TIMER_CPI			2
+#define VIC_INVALIDATE_CPI		3
+#define VIC_RESCHEDULE_CPI		4
+#define VIC_ENABLE_IRQ_CPI		5
+#define VIC_CALL_FUNCTION_CPI		6
+
+/* Now the QIC CPIs:  Since we don't need the two initial levels,
+ * these are 2 less than the VIC CPIs */
+#define QIC_CPI_OFFSET			1
+#define QIC_TIMER_CPI			(VIC_TIMER_CPI - QIC_CPI_OFFSET)
+#define QIC_INVALIDATE_CPI		(VIC_INVALIDATE_CPI - QIC_CPI_OFFSET)
+#define QIC_RESCHEDULE_CPI		(VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET)
+#define QIC_ENABLE_IRQ_CPI		(VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET)
+#define QIC_CALL_FUNCTION_CPI		(VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET)
+
+#define VIC_START_FAKE_CPI		VIC_TIMER_CPI
+#define VIC_END_FAKE_CPI		VIC_CALL_FUNCTION_CPI
+
+/* this is the SYS_INT CPI. */
+#define VIC_SYS_INT			8
+#define VIC_CMN_INT			15
+
+/* This is the boot CPI for alternate processors.  It gets overwritten
+ * by the above once the system has activated all available processors */
+#define VIC_CPU_BOOT_CPI		VIC_CPI_LEVEL0
+#define VIC_CPU_BOOT_ERRATA_CPI		(VIC_CPI_LEVEL0 + 8)
+
+
+#endif /* _ASM_IRQ_VECTORS_H */
diff --git a/include/asm-x86/mach-default/irq_vectors.h b/include/asm-x86/mach-default/irq_vectors.h
deleted file mode 100644
index 881c63ca61a..00000000000
--- a/include/asm-x86/mach-default/irq_vectors.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * This file should contain #defines for all of the interrupt vector
- * numbers used by this architecture.
- *
- * In addition, there are some standard defines:
- *
- *	FIRST_EXTERNAL_VECTOR:
- *		The first free place for external interrupts
- *
- *	SYSCALL_VECTOR:
- *		The IRQ vector a syscall makes the user to kernel transition
- *		under.
- *
- *	TIMER_IRQ:
- *		The IRQ number the timer interrupt comes in at.
- *
- *	NR_IRQS:
- *		The total number of interrupt vectors (including all the
- *		architecture specific interrupts) needed.
- *
- */			
-#ifndef _ASM_IRQ_VECTORS_H
-#define _ASM_IRQ_VECTORS_H
-
-/*
- * IDT vectors usable for external interrupt sources start
- * at 0x20:
- */
-#define FIRST_EXTERNAL_VECTOR	0x20
-
-#define SYSCALL_VECTOR		0x80
-
-/*
- * Vectors 0x20-0x2f are used for ISA interrupts.
- */
-
-/*
- * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
- *
- *  some of the following vectors are 'rare', they are merged
- *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
- *  TLB, reschedule and local APIC vectors are performance-critical.
- *
- *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
- */
-#define SPURIOUS_APIC_VECTOR	0xff
-#define ERROR_APIC_VECTOR	0xfe
-#define INVALIDATE_TLB_VECTOR	0xfd
-#define RESCHEDULE_VECTOR	0xfc
-#define CALL_FUNCTION_VECTOR	0xfb
-
-#define THERMAL_APIC_VECTOR	0xf0
-/*
- * Local APIC timer IRQ vector is on a different priority level,
- * to work around the 'lost local interrupt if more than 2 IRQ
- * sources per level' errata.
- */
-#define LOCAL_TIMER_VECTOR	0xef
-
-/*
- * First APIC vector available to drivers: (vectors 0x30-0xee)
- * we start at 0x31 to spread out vectors evenly between priority
- * levels. (0x80 is the syscall vector)
- */
-#define FIRST_DEVICE_VECTOR	0x31
-#define FIRST_SYSTEM_VECTOR	0xef
-
-#define TIMER_IRQ 0
-
-/*
- * 16 8259A IRQ's, 208 potential APIC interrupt sources.
- * Right now the APIC is mostly only used for SMP.
- * 256 vectors is an architectural limit. (we can have
- * more than 256 devices theoretically, but they will
- * have to use shared interrupts)
- * Since vectors 0x00-0x1f are used/reserved for the CPU,
- * the usable vector space is 0x20-0xff (224 vectors)
- */
-
-/*
- * The maximum number of vectors supported by i386 processors
- * is limited to 256. For processors other than i386, NR_VECTORS
- * should be changed accordingly.
- */
-#define NR_VECTORS 256
-
-#include "irq_vectors_limits.h"
-
-#define FPU_IRQ			13
-
-#define	FIRST_VM86_IRQ		3
-#define LAST_VM86_IRQ		15
-#define invalid_vm86_irq(irq)	((irq) < 3 || (irq) > 15)
-
-
-#endif /* _ASM_IRQ_VECTORS_H */
diff --git a/include/asm-x86/mach-default/irq_vectors_limits.h b/include/asm-x86/mach-default/irq_vectors_limits.h
deleted file mode 100644
index a90c7a60109..00000000000
--- a/include/asm-x86/mach-default/irq_vectors_limits.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _ASM_IRQ_VECTORS_LIMITS_H
-#define _ASM_IRQ_VECTORS_LIMITS_H
-
-#if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT)
-#define NR_IRQS 224
-# if (224 >= 32 * NR_CPUS)
-# define NR_IRQ_VECTORS NR_IRQS
-# else
-# define NR_IRQ_VECTORS (32 * NR_CPUS)
-# endif
-#else
-#define NR_IRQS 16
-#define NR_IRQ_VECTORS NR_IRQS
-#endif
-
-#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
diff --git a/include/asm-x86/mach-visws/irq_vectors.h b/include/asm-x86/mach-visws/irq_vectors.h
deleted file mode 100644
index cb572d8db50..00000000000
--- a/include/asm-x86/mach-visws/irq_vectors.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef _ASM_IRQ_VECTORS_H
-#define _ASM_IRQ_VECTORS_H
-
-/*
- * IDT vectors usable for external interrupt sources start
- * at 0x20:
- */
-#define FIRST_EXTERNAL_VECTOR	0x20
-
-#define SYSCALL_VECTOR		0x80
-
-/*
- * Vectors 0x20-0x2f are used for ISA interrupts.
- */
-
-/*
- * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
- *
- *  some of the following vectors are 'rare', they are merged
- *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
- *  TLB, reschedule and local APIC vectors are performance-critical.
- *
- *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
- */
-#define SPURIOUS_APIC_VECTOR	0xff
-#define ERROR_APIC_VECTOR	0xfe
-#define INVALIDATE_TLB_VECTOR	0xfd
-#define RESCHEDULE_VECTOR	0xfc
-#define CALL_FUNCTION_VECTOR	0xfb
-
-#define THERMAL_APIC_VECTOR	0xf0
-/*
- * Local APIC timer IRQ vector is on a different priority level,
- * to work around the 'lost local interrupt if more than 2 IRQ
- * sources per level' errata.
- */
-#define LOCAL_TIMER_VECTOR	0xef
-
-/*
- * First APIC vector available to drivers: (vectors 0x30-0xee)
- * we start at 0x31 to spread out vectors evenly between priority
- * levels. (0x80 is the syscall vector)
- */
-#define FIRST_DEVICE_VECTOR	0x31
-#define FIRST_SYSTEM_VECTOR	0xef
-
-#define TIMER_IRQ 0
-
-/*
- * IRQ definitions
- */
-#define NR_VECTORS 256
-#define NR_IRQS 224
-#define NR_IRQ_VECTORS NR_IRQS
-
-#define FPU_IRQ			13
-
-#define	FIRST_VM86_IRQ		3
-#define LAST_VM86_IRQ		15
-#define invalid_vm86_irq(irq)	((irq) < 3 || (irq) > 15)
-
-#endif /* _ASM_IRQ_VECTORS_H */
diff --git a/include/asm-x86/mach-voyager/irq_vectors.h b/include/asm-x86/mach-voyager/irq_vectors.h
deleted file mode 100644
index 165421f5821..00000000000
--- a/include/asm-x86/mach-voyager/irq_vectors.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8 -*- */
-
-/* Copyright (C) 2002
- *
- * Author: James.Bottomley@HansenPartnership.com
- *
- * linux/arch/i386/voyager/irq_vectors.h
- *
- * This file provides definitions for the VIC and QIC CPIs
- */
-
-#ifndef _ASM_IRQ_VECTORS_H
-#define _ASM_IRQ_VECTORS_H
-
-/*
- * IDT vectors usable for external interrupt sources start
- * at 0x20:
- */
-#define FIRST_EXTERNAL_VECTOR	0x20
-
-#define SYSCALL_VECTOR		0x80
-
-/*
- * Vectors 0x20-0x2f are used for ISA interrupts.
- */
-
-/* These define the CPIs we use in linux */
-#define VIC_CPI_LEVEL0			0
-#define VIC_CPI_LEVEL1			1
-/* now the fake CPIs */
-#define VIC_TIMER_CPI			2
-#define VIC_INVALIDATE_CPI		3
-#define VIC_RESCHEDULE_CPI		4
-#define VIC_ENABLE_IRQ_CPI		5
-#define VIC_CALL_FUNCTION_CPI		6
-
-/* Now the QIC CPIs:  Since we don't need the two initial levels,
- * these are 2 less than the VIC CPIs */
-#define QIC_CPI_OFFSET			1
-#define QIC_TIMER_CPI			(VIC_TIMER_CPI - QIC_CPI_OFFSET)
-#define QIC_INVALIDATE_CPI		(VIC_INVALIDATE_CPI - QIC_CPI_OFFSET)
-#define QIC_RESCHEDULE_CPI		(VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET)
-#define QIC_ENABLE_IRQ_CPI		(VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET)
-#define QIC_CALL_FUNCTION_CPI		(VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET)
-
-#define VIC_START_FAKE_CPI		VIC_TIMER_CPI
-#define VIC_END_FAKE_CPI		VIC_CALL_FUNCTION_CPI
-
-/* this is the SYS_INT CPI. */
-#define VIC_SYS_INT			8
-#define VIC_CMN_INT			15
-
-/* This is the boot CPI for alternate processors.  It gets overwritten
- * by the above once the system has activated all available processors */
-#define VIC_CPU_BOOT_CPI		VIC_CPI_LEVEL0
-#define VIC_CPU_BOOT_ERRATA_CPI		(VIC_CPI_LEVEL0 + 8)
-
-#define NR_VECTORS 256
-#define NR_IRQS 224
-#define NR_IRQ_VECTORS NR_IRQS
-
-#define FPU_IRQ				13
-
-#define	FIRST_VM86_IRQ		3
-#define LAST_VM86_IRQ		15
-#define invalid_vm86_irq(irq)	((irq) < 3 || (irq) > 15)
-
-#ifndef __ASSEMBLY__
-extern asmlinkage void vic_cpi_interrupt(void);
-extern asmlinkage void vic_sys_interrupt(void);
-extern asmlinkage void vic_cmn_interrupt(void);
-extern asmlinkage void qic_timer_interrupt(void);
-extern asmlinkage void qic_invalidate_interrupt(void);
-extern asmlinkage void qic_reschedule_interrupt(void);
-extern asmlinkage void qic_enable_irq_interrupt(void);
-extern asmlinkage void qic_call_function_interrupt(void);
-#endif /* !__ASSEMBLY__ */
-
-#endif /* _ASM_IRQ_VECTORS_H */
-- 
cgit v1.2.3-70-g09d2


From 0bc471d93051a19545257909bc2ed2ad3b389b54 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 2 May 2008 21:55:12 +0200
Subject: x86: move BUILD_IRQ macro magic to i8259_64.c

i8259_64.c is the only place which uses those macros.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/i8259_64.c  | 14 ++++++++++++++
 include/asm-x86/hw_irq_64.h | 14 --------------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index fa57a156850..c4ae4769ce6 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -34,6 +34,20 @@
  * interrupt-controller happy.
  */
 
+#define IRQ_NAME2(nr) nr##_interrupt(void)
+#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
+/*
+ *	SMP has a few special interrupts for IPI messages
+ */
+
+#define BUILD_IRQ(nr)				\
+	asmlinkage void IRQ_NAME(nr);		\
+	asm("\n.p2align\n"			\
+	    "IRQ" #nr "_interrupt:\n\t"		\
+	    "push $~(" #nr ") ; "		\
+	    "jmp common_interrupt");
+
 #define BI(x,y) \
 	BUILD_IRQ(x##y)
 
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
index 98c9d494a71..9305f7456a7 100644
--- a/include/asm-x86/hw_irq_64.h
+++ b/include/asm-x86/hw_irq_64.h
@@ -17,18 +17,4 @@ extern void native_init_IRQ(void);
 
 #include <asm/ptrace.h>
 
-#define IRQ_NAME2(nr) nr##_interrupt(void)
-#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
-
-/*
- *	SMP has a few special interrupts for IPI messages
- */
-
-#define BUILD_IRQ(nr)				\
-	asmlinkage void IRQ_NAME(nr);		\
-	asm("\n.p2align\n"			\
-	    "IRQ" #nr "_interrupt:\n\t"		\
-	    "push $~(" #nr ") ; "		\
-	    "jmp common_interrupt");
-
 #endif
-- 
cgit v1.2.3-70-g09d2


From 97e7b6f54c0d66586a658e985630cd63040311fb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 2 May 2008 22:02:25 +0200
Subject: x86: unify apic interrupt function declarations

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/hw_irq.h    | 4 ++++
 include/asm-x86/hw_irq_64.h | 3 ---
 include/asm-x86/irq_64.h    | 2 ++
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index a8c5e8bdaa4..cdb09d77af0 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -67,6 +67,10 @@ extern void print_IO_APIC(void);
 extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
 extern void setup_ioapic_dest(void);
 
+#ifdef CONFIG_X86_64
+extern void enable_IO_APIC(void);
+#endif
+
 /* IPI functions */
 extern void send_IPI_self(int vector);
 extern void send_IPI(int dest, int vector);
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
index 9305f7456a7..428785b3634 100644
--- a/include/asm-x86/hw_irq_64.h
+++ b/include/asm-x86/hw_irq_64.h
@@ -12,9 +12,6 @@ extern spinlock_t vector_lock;
  * Interrupt entry/exit code at both C and assembly level
  */
 
-extern void enable_IO_APIC(void);
-extern void native_init_IRQ(void);
-
 #include <asm/ptrace.h>
 
 #endif
diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h
index 3037ec667bf..1ef233d15dc 100644
--- a/include/asm-x86/irq_64.h
+++ b/include/asm-x86/irq_64.h
@@ -27,4 +27,6 @@ extern void fixup_irqs(cpumask_t map);
 
 #define __ARCH_HAS_DO_SOFTIRQ 1
 
+extern void native_init_IRQ(void);
+
 #endif /* _ASM_IRQ_H */
-- 
cgit v1.2.3-70-g09d2


From 22dc12d1f694b9af88e616ab758ff90c69d0fc83 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 2 May 2008 22:10:39 +0200
Subject: x86: unify hwirq.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/hw_irq.h    | 11 +++++++----
 include/asm-x86/hw_irq_32.h |  5 -----
 include/asm-x86/hw_irq_64.h | 17 -----------------
 3 files changed, 7 insertions(+), 26 deletions(-)
 delete mode 100644 include/asm-x86/hw_irq_32.h
 delete mode 100644 include/asm-x86/hw_irq_64.h

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index cdb09d77af0..1db2dff1ef4 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -89,12 +89,15 @@ extern asmlinkage void qic_reschedule_interrupt(void);
 extern asmlinkage void qic_enable_irq_interrupt(void);
 extern asmlinkage void qic_call_function_interrupt(void);
 
-#endif /* !ASSEMBLY_ */
-
 #ifdef CONFIG_X86_32
-# include "hw_irq_32.h"
+extern void (*const interrupt[NR_IRQS])(void);
 #else
-# include "hw_irq_64.h"
+typedef int vector_irq_t[NR_VECTORS];
+DECLARE_PER_CPU(vector_irq_t, vector_irq);
+extern void __setup_vector_irq(int cpu);
+extern spinlock_t vector_lock;
 #endif
 
+#endif /* !ASSEMBLY_ */
+
 #endif
diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h
deleted file mode 100644
index 89fca5af595..00000000000
--- a/include/asm-x86/hw_irq_32.h
+++ /dev/null
@@ -1,5 +0,0 @@
-
-extern void (*const interrupt[NR_IRQS])(void);
-
-
-
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
deleted file mode 100644
index 428785b3634..00000000000
--- a/include/asm-x86/hw_irq_64.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef __ASSEMBLY__
-
-typedef int vector_irq_t[NR_VECTORS];
-DECLARE_PER_CPU(vector_irq_t, vector_irq);
-extern void __setup_vector_irq(int cpu);
-extern spinlock_t vector_lock;
-
-/*
- * Various low-level irq details needed by irq.c, process.c,
- * time.c, io_apic.c and smp.c
- *
- * Interrupt entry/exit code at both C and assembly level
- */
-
-#include <asm/ptrace.h>
-
-#endif
-- 
cgit v1.2.3-70-g09d2


From 22067d4501bfb47c8ca34144f68fad734178914d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 2 May 2008 22:14:44 +0200
Subject: x86: unify irq.h

Not much difference in those files.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/irq.h    | 51 +++++++++++++++++++++++++++++++++++++++++++++---
 include/asm-x86/irq_32.h | 51 ------------------------------------------------
 include/asm-x86/irq_64.h | 32 ------------------------------
 3 files changed, 48 insertions(+), 86 deletions(-)
 delete mode 100644 include/asm-x86/irq_32.h
 delete mode 100644 include/asm-x86/irq_64.h

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/irq.h b/include/asm-x86/irq.h
index 7ba905465a5..1a292575731 100644
--- a/include/asm-x86/irq.h
+++ b/include/asm-x86/irq.h
@@ -1,5 +1,50 @@
-#ifdef CONFIG_X86_32
-# include "irq_32.h"
+#ifndef _ASM_IRQ_H
+#define _ASM_IRQ_H
+/*
+ *	(C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ *
+ *	IRQ/IPI changes taken from work by Thomas Radke
+ *	<tomsoft@informatik.tu-chemnitz.de>
+ */
+
+#include <asm/apicdef.h>
+#include <asm/irq_vectors.h>
+
+static inline int irq_canonicalize(int irq)
+{
+	return ((irq == 2) ? 9 : irq);
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+# define ARCH_HAS_NMI_WATCHDOG
+#endif
+
+#ifdef CONFIG_4KSTACKS
+  extern void irq_ctx_init(int cpu);
+  extern void irq_ctx_exit(int cpu);
+# define __ARCH_HAS_DO_SOFTIRQ
 #else
-# include "irq_64.h"
+# define irq_ctx_init(cpu) do { } while (0)
+# define irq_ctx_exit(cpu) do { } while (0)
+# ifdef CONFIG_X86_64
+#  define __ARCH_HAS_DO_SOFTIRQ
+# endif
+#endif
+
+#ifdef CONFIG_IRQBALANCE
+extern int irqbalance_disable(char *str);
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+#include <linux/cpumask.h>
+extern void fixup_irqs(cpumask_t map);
 #endif
+
+extern unsigned int do_IRQ(struct pt_regs *regs);
+extern void init_IRQ(void);
+extern void native_init_IRQ(void);
+
+/* Interrupt vector management */
+extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
+
+#endif /* _ASM_IRQ_H */
diff --git a/include/asm-x86/irq_32.h b/include/asm-x86/irq_32.h
deleted file mode 100644
index c5c7542f79a..00000000000
--- a/include/asm-x86/irq_32.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef _ASM_IRQ_H
-#define _ASM_IRQ_H
-
-/*
- *	linux/include/asm/irq.h
- *
- *	(C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
- *
- *	IRQ/IPI changes taken from work by Thomas Radke
- *	<tomsoft@informatik.tu-chemnitz.de>
- */
-
-#include <linux/sched.h>
-/* include comes from machine specific directory */
-#include <asm/irq_vectors.h>
-#include <asm/thread_info.h>
-
-static inline int irq_canonicalize(int irq)
-{
-	return ((irq == 2) ? 9 : irq);
-}
-
-#ifdef CONFIG_X86_LOCAL_APIC
-# define ARCH_HAS_NMI_WATCHDOG		/* See include/linux/nmi.h */
-#endif
-
-#ifdef CONFIG_4KSTACKS
-  extern void irq_ctx_init(int cpu);
-  extern void irq_ctx_exit(int cpu);
-# define __ARCH_HAS_DO_SOFTIRQ
-#else
-# define irq_ctx_init(cpu) do { } while (0)
-# define irq_ctx_exit(cpu) do { } while (0)
-#endif
-
-#ifdef CONFIG_IRQBALANCE
-extern int irqbalance_disable(char *str);
-#endif
-
-#ifdef CONFIG_HOTPLUG_CPU
-extern void fixup_irqs(cpumask_t map);
-#endif
-
-unsigned int do_IRQ(struct pt_regs *regs);
-void init_IRQ(void);
-void __init native_init_IRQ(void);
-
-/* Interrupt vector management */
-extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
-
-#endif /* _ASM_IRQ_H */
diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h
deleted file mode 100644
index 1ef233d15dc..00000000000
--- a/include/asm-x86/irq_64.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef _ASM_IRQ_H
-#define _ASM_IRQ_H
-
-/*
- *	linux/include/asm/irq.h
- *
- *	(C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
- *
- *	IRQ/IPI changes taken from work by Thomas Radke
- *	<tomsoft@informatik.tu-chemnitz.de>
- */
-
-#include <asm/apicdef.h>
-#include <asm/irq_vectors.h>
-
-static inline int irq_canonicalize(int irq)
-{
-	return ((irq == 2) ? 9 : irq);
-}
-
-#define ARCH_HAS_NMI_WATCHDOG		/* See include/linux/nmi.h */
-
-#ifdef CONFIG_HOTPLUG_CPU
-#include <linux/cpumask.h>
-extern void fixup_irqs(cpumask_t map);
-#endif
-
-#define __ARCH_HAS_DO_SOFTIRQ 1
-
-extern void native_init_IRQ(void);
-
-#endif /* _ASM_IRQ_H */
-- 
cgit v1.2.3-70-g09d2


From 88a83350bc1955fa9d8fca059e19bc360fcef2ba Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 2 May 2008 23:19:26 +0200
Subject: x86: declare setup_apic_routing

Global functions need a prototype.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/genapic_64.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h
index 1de931b263c..0f8504627c4 100644
--- a/include/asm-x86/genapic_64.h
+++ b/include/asm-x86/genapic_64.h
@@ -44,4 +44,6 @@ DECLARE_PER_CPU(int, x2apic_extra_bits);
 extern void uv_cpu_init(void);
 extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
 
+extern void setup_apic_routing(void);
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 1a331957efd214fc3a84f70956dfaec65e70c031 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 3 May 2008 00:30:50 +0200
Subject: x86: move eisa_set_level_irq declaration to header

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c | 2 --
 include/asm-x86/hw_irq.h    | 3 +++
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c49ebcc6c41..671591fcc61 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -507,8 +507,6 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity)
 	 * Make sure all (legacy) PCI IRQs are set as level-triggered.
 	 */
 	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
-		extern void eisa_set_level_irq(unsigned int irq);
-
 		if (triggering == ACPI_LEVEL_SENSITIVE)
 			eisa_set_level_irq(gsi);
 	}
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 1db2dff1ef4..1428b41dcbb 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -79,6 +79,9 @@ extern void send_IPI(int dest, int vector);
 extern atomic_t irq_err_count;
 extern atomic_t irq_mis_count;
 
+/* EISA */
+extern void eisa_set_level_irq(unsigned int irq);
+
 /* Voyager functions */
 extern asmlinkage void vic_cpi_interrupt(void);
 extern asmlinkage void vic_sys_interrupt(void);
-- 
cgit v1.2.3-70-g09d2


From 305b92a2323eeaa4b481f409d54f778dd7e21a46 Mon Sep 17 00:00:00 2001
From: Alan Mayer <ajm@sgi.com>
Date: Tue, 15 Apr 2008 15:36:56 -0500
Subject: x86: change FIRST_SYSTEM_VECTOR to a variable

The SGI UV system needs several more system vectors than a vanilla
x86_64 system.  Rather than burden the other archs with extra system
vectors that they don't use, change FIRST_SYSTEM_VECTOR to a variable,
so that it can be dynamic.

Signed-off-by: Alan Mayer <ajm@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic_32.c     | 14 +++++++-------
 arch/x86/kernel/i8259_64.c    | 30 +++++++++++++++---------------
 arch/x86/kernel/io_apic_32.c  |  8 ++++++--
 arch/x86/kernel/io_apic_64.c  |  6 +++++-
 include/asm-x86/desc.h        | 22 ++++++++++++++++++++++
 include/asm-x86/irq_vectors.h |  2 --
 6 files changed, 55 insertions(+), 27 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4b99b1bdeb6..807158e4b5d 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1351,13 +1351,13 @@ void __init smp_intr_init(void)
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 	 * IPI, driven by wakeup.
 	 */
-	set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
 
 	/* IPI for invalidation */
-	set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
 
 	/* IPI for generic function call */
-	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 }
 #endif
 
@@ -1370,15 +1370,15 @@ void __init apic_intr_init(void)
 	smp_intr_init();
 #endif
 	/* self generated IPI for local APIC timer */
-	set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 
 	/* IPI vectors for APIC spurious and error interrupts */
-	set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
-	set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 
 	/* thermal monitor LVT interrupt */
 #ifdef CONFIG_X86_MCE_P4THERMAL
-	set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 #endif
 }
 
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index c4ae4769ce6..1870e0e8655 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -493,33 +493,33 @@ void __init native_init_IRQ(void)
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 	 * IPI, driven by wakeup.
 	 */
-	set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
 
 	/* IPIs for invalidation */
-	set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
-	set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
-	set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
-	set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
-	set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
-	set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
-	set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
-	set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
 
 	/* IPI for generic function call */
-	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 #endif
-	set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
-	set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
+	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
 
 	/* self generated IPI for local APIC timer */
-	set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 
 	/* IPI vectors for APIC spurious and error interrupts */
-	set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
-	set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 
 	if (!acpi_ioapic)
 		setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index a40d54fc1fd..0ae4a9d00ce 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -83,6 +83,10 @@ int mp_irq_entries;
 
 static int disable_timer_pin_1 __initdata;
 
+int first_system_vector = 0xfe;
+
+char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
+
 /*
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
@@ -1176,7 +1180,7 @@ static int __assign_irq_vector(int irq)
 	offset = current_offset;
 next:
 	vector += 8;
-	if (vector >= FIRST_SYSTEM_VECTOR) {
+	if (vector >= first_system_vector) {
 		offset = (offset + 1) % 8;
 		vector = FIRST_DEVICE_VECTOR + offset;
 	}
@@ -2269,7 +2273,7 @@ void __init setup_IO_APIC(void)
 	int i;
 
 	/* Reserve all the system vectors. */
-	for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++)
+	for (i = first_system_vector; i < NR_VECTORS; i++)
 		set_bit(i, used_vectors);
 
 	enable_IO_APIC();
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index ef1a8dfcc52..f1e1ae3e5c7 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -82,6 +82,10 @@ struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
 
 static int assign_irq_vector(int irq, cpumask_t mask);
 
+int first_system_vector = 0xfe;
+
+char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
+
 #define __apicdebuginit  __init
 
 int sis_apic_bug; /* not actually supported, dummy for compile */
@@ -730,7 +734,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 		offset = current_offset;
 next:
 		vector += 8;
-		if (vector >= FIRST_SYSTEM_VECTOR) {
+		if (vector >= first_system_vector) {
 			/* If we run out of vectors on large boxen, must share them. */
 			offset = (offset + 1) % 8;
 			vector = FIRST_DEVICE_VECTOR + offset;
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index 268a012bcd7..b3875d4b4fa 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -311,6 +311,28 @@ static inline void set_intr_gate(unsigned int n, void *addr)
 	_set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
 }
 
+#define SYS_VECTOR_FREE		0
+#define SYS_VECTOR_ALLOCED	1
+
+extern int first_system_vector;
+extern char system_vectors[];
+
+static inline void alloc_system_vector(int vector)
+{
+	if (system_vectors[vector] == SYS_VECTOR_FREE) {
+		system_vectors[vector] = SYS_VECTOR_ALLOCED;
+		if (first_system_vector > vector)
+			first_system_vector = vector;
+	} else
+		BUG();
+}
+
+static inline void alloc_intr_gate(unsigned int n, void *addr)
+{
+	alloc_system_vector(n);
+	set_intr_gate(n, addr);
+}
+
 /*
  * This routine sets up an interrupt gate at directory privilege level 3.
  */
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
index daceaaf0a3a..3cb6d8c77b3 100644
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -96,8 +96,6 @@
 # define FIRST_DEVICE_VECTOR	(IRQ15_VECTOR + 2)
 #endif
 
-#define FIRST_SYSTEM_VECTOR	0xef
-
 #define NR_VECTORS		256
 
 #define FPU_IRQ			13
-- 
cgit v1.2.3-70-g09d2


From f8096f92b87d81c55ed63964d27baa9ce5ffe508 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Tue, 22 Apr 2008 16:27:29 +0100
Subject: x86: separate cmpxchg8b checking from PAE checking

.. allowing the former to be use in non-PAE kernels, too.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.cpu                | 4 ++++
 include/asm-x86/required-features.h | 6 +++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2ad6301849a..3d22bb8175b 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -399,6 +399,10 @@ config X86_TSC
 	def_bool y
 	depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
 
+config X86_CMPXCHG64
+	def_bool y
+	depends on X86_PAE || X86_64
+
 # this should be set for all -march=.. options where the compiler
 # generates cmov.
 config X86_CMOV
diff --git a/include/asm-x86/required-features.h b/include/asm-x86/required-features.h
index 7400d3ad75c..8c387198ca8 100644
--- a/include/asm-x86/required-features.h
+++ b/include/asm-x86/required-features.h
@@ -19,9 +19,13 @@
 
 #if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
 # define NEED_PAE	(1<<(X86_FEATURE_PAE & 31))
-# define NEED_CX8	(1<<(X86_FEATURE_CX8 & 31))
 #else
 # define NEED_PAE	0
+#endif
+
+#ifdef CONFIG_X86_CMPXCHG64
+# define NEED_CX8	(1<<(X86_FEATURE_CX8 & 31))
+#else
 # define NEED_CX8	0
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From aa134f1b09df6beaa4d031a50d5fda1f3cebce6c Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Tue, 8 Apr 2008 10:49:03 +0200
Subject: x86: iommu: use symbolic constants, not hardcoded numbers

Move symbolic constants into gart.h, and use them instead of hardcoded
constant.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/pci-gart_64.c | 10 +++++-----
 drivers/char/agp/amd64-agp.c  | 25 +++----------------------
 include/asm-x86/gart.h        | 21 +++++++++++++++++++++
 3 files changed, 29 insertions(+), 27 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index c07455d1695..bffcf455c85 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -598,13 +598,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 		dev = k8_northbridges[i];
 		gatt_reg = __pa(gatt) >> 12;
 		gatt_reg <<= 4;
-		pci_write_config_dword(dev, 0x98, gatt_reg);
-		pci_read_config_dword(dev, 0x90, &ctl);
+		pci_write_config_dword(dev, AMD64_GARTTABLEBASE, gatt_reg);
+		pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
 
-		ctl |= 1;
-		ctl &= ~((1<<4) | (1<<5));
+		ctl |= GARTEN;
+		ctl &= ~(DISGARTCPU | DISGARTIO);
 
-		pci_write_config_dword(dev, 0x90, ctl);
+		pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
 	}
 	flush_gart();
 
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index d8200ac8f8c..25d64224cdb 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -16,28 +16,9 @@
 #include <asm/page.h>		/* PAGE_SIZE */
 #include <asm/e820.h>
 #include <asm/k8.h>
+#include <asm/gart.h>
 #include "agp.h"
 
-/* PTE bits. */
-#define GPTE_VALID	1
-#define GPTE_COHERENT	2
-
-/* Aperture control register bits. */
-#define GARTEN		(1<<0)
-#define DISGARTCPU	(1<<4)
-#define DISGARTIO	(1<<5)
-
-/* GART cache control register bits. */
-#define INVGART		(1<<0)
-#define GARTPTEERR	(1<<1)
-
-/* K8 On-cpu GART registers */
-#define AMD64_GARTAPERTURECTL	0x90
-#define AMD64_GARTAPERTUREBASE	0x94
-#define AMD64_GARTTABLEBASE	0x98
-#define AMD64_GARTCACHECTL	0x9c
-#define AMD64_GARTEN		(1<<0)
-
 /* NVIDIA K8 registers */
 #define NVIDIA_X86_64_0_APBASE		0x10
 #define NVIDIA_X86_64_1_APBASE1		0x50
@@ -165,7 +146,7 @@ static int amd64_fetch_size(void)
  * In a multiprocessor x86-64 system, this function gets
  * called once for each CPU.
  */
-static u64 amd64_configure (struct pci_dev *hammer, u64 gatt_table)
+static u64 amd64_configure(struct pci_dev *hammer, u64 gatt_table)
 {
 	u64 aperturebase;
 	u32 tmp;
@@ -181,7 +162,7 @@ static u64 amd64_configure (struct pci_dev *hammer, u64 gatt_table)
 	addr >>= 12;
 	tmp = (u32) addr<<4;
 	tmp &= ~0xf;
-	pci_write_config_dword (hammer, AMD64_GARTTABLEBASE, tmp);
+	pci_write_config_dword(hammer, AMD64_GARTTABLEBASE, tmp);
 
 	/* Enable GART translation for this hammer. */
 	pci_read_config_dword(hammer, AMD64_GARTAPERTURECTL, &tmp);
diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h
index 90958ed993f..248e5778e92 100644
--- a/include/asm-x86/gart.h
+++ b/include/asm-x86/gart.h
@@ -5,6 +5,7 @@ extern void pci_iommu_shutdown(void);
 extern void no_iommu_init(void);
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
+extern int agp_amd64_init(void);
 #ifdef CONFIG_GART_IOMMU
 extern void gart_iommu_init(void);
 extern void gart_iommu_shutdown(void);
@@ -31,4 +32,24 @@ static inline void gart_iommu_shutdown(void)
 
 #endif
 
+/* PTE bits. */
+#define GPTE_VALID	1
+#define GPTE_COHERENT	2
+
+/* Aperture control register bits. */
+#define GARTEN		(1<<0)
+#define DISGARTCPU	(1<<4)
+#define DISGARTIO	(1<<5)
+
+/* GART cache control register bits. */
+#define INVGART		(1<<0)
+#define GARTPTEERR	(1<<1)
+
+/* K8 On-cpu GART registers */
+#define AMD64_GARTAPERTURECTL	0x90
+#define AMD64_GARTAPERTUREBASE	0x94
+#define AMD64_GARTTABLEBASE	0x98
+#define AMD64_GARTCACHECTL	0x9c
+#define AMD64_GARTEN		(1<<0)
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 3bb6fbf9969a8bbe4892968659239273d092e78a Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Tue, 15 Apr 2008 12:43:57 +0200
Subject: x86 gart: factor out common code

Cleanup gart handling on amd64 a bit: move common code into
enable_gart_translation , and use symbolic register names where
appropriate.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/pci-gart_64.c | 23 ++++++-----------------
 drivers/char/agp/amd64-agp.c  | 29 +++++++++--------------------
 include/asm-x86/gart.h        | 17 +++++++++++++++++
 3 files changed, 32 insertions(+), 37 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index bffcf455c85..1f99b62ff61 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -533,8 +533,8 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
 	unsigned aper_size = 0, aper_base_32, aper_order;
 	u64 aper_base;
 
-	pci_read_config_dword(dev, 0x94, &aper_base_32);
-	pci_read_config_dword(dev, 0x90, &aper_order);
+	pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32);
+	pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order);
 	aper_order = (aper_order >> 1) & 7;
 
 	aper_base = aper_base_32 & 0x7fff;
@@ -592,19 +592,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 	agp_gatt_table = gatt;
 
 	for (i = 0; i < num_k8_northbridges; i++) {
-		u32 gatt_reg;
-		u32 ctl;
-
 		dev = k8_northbridges[i];
-		gatt_reg = __pa(gatt) >> 12;
-		gatt_reg <<= 4;
-		pci_write_config_dword(dev, AMD64_GARTTABLEBASE, gatt_reg);
-		pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
-
-		ctl |= GARTEN;
-		ctl &= ~(DISGARTCPU | DISGARTIO);
-
-		pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
+		enable_gart_translation(dev, __pa(gatt));
 	}
 	flush_gart();
 
@@ -648,11 +637,11 @@ void gart_iommu_shutdown(void)
 		u32 ctl;
 
 		dev = k8_northbridges[i];
-		pci_read_config_dword(dev, 0x90, &ctl);
+		pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
 
-		ctl &= ~1;
+		ctl &= ~GARTEN;
 
-		pci_write_config_dword(dev, 0x90, ctl);
+		pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
 	}
 }
 
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 9c24470a825..e3c7ea07f57 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -150,25 +150,14 @@ static u64 amd64_configure(struct pci_dev *hammer, u64 gatt_table)
 {
 	u64 aperturebase;
 	u32 tmp;
-	u64 addr, aper_base;
+	u64 aper_base;
 
 	/* Address to map to */
-	pci_read_config_dword (hammer, AMD64_GARTAPERTUREBASE, &tmp);
+	pci_read_config_dword(hammer, AMD64_GARTAPERTUREBASE, &tmp);
 	aperturebase = tmp << 25;
 	aper_base = (aperturebase & PCI_BASE_ADDRESS_MEM_MASK);
 
-	/* address of the mappings table */
-	addr = (u64) gatt_table;
-	addr >>= 12;
-	tmp = (u32) addr<<4;
-	tmp &= ~0xf;
-	pci_write_config_dword(hammer, AMD64_GARTTABLEBASE, tmp);
-
-	/* Enable GART translation for this hammer. */
-	pci_read_config_dword(hammer, AMD64_GARTAPERTURECTL, &tmp);
-	tmp |= GARTEN;
-	tmp &= ~(DISGARTCPU | DISGARTIO);
-	pci_write_config_dword(hammer, AMD64_GARTAPERTURECTL, tmp);
+	enable_gart_translation(hammer, gatt_table);
 
 	return aper_base;
 }
@@ -207,9 +196,9 @@ static void amd64_cleanup(void)
         for (i = 0; i < num_k8_northbridges; i++) {
 		struct pci_dev *dev = k8_northbridges[i];
 		/* disable gart translation */
-		pci_read_config_dword (dev, AMD64_GARTAPERTURECTL, &tmp);
+		pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp);
 		tmp &= ~AMD64_GARTEN;
-		pci_write_config_dword (dev, AMD64_GARTAPERTURECTL, tmp);
+		pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, tmp);
 	}
 }
 
@@ -289,9 +278,9 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp,
 	u32 nb_order, nb_base;
 	u16 apsize;
 
-	pci_read_config_dword(nb, 0x90, &nb_order);
+	pci_read_config_dword(nb, AMD64_GARTAPERTURECTL, &nb_order);
 	nb_order = (nb_order >> 1) & 7;
-	pci_read_config_dword(nb, 0x94, &nb_base);
+	pci_read_config_dword(nb, AMD64_GARTAPERTUREBASE, &nb_base);
 	nb_aper = nb_base << 25;
 	if (aperture_valid(nb_aper, (32*1024*1024)<<nb_order)) {
 		return 0;
@@ -327,8 +316,8 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp,
 	if (order < 0 || !aperture_valid(aper, (32*1024*1024)<<order))
 		return -1;
 
-	pci_write_config_dword(nb, 0x90, order << 1);
-	pci_write_config_dword(nb, 0x94, aper >> 25);
+	pci_write_config_dword(nb, AMD64_GARTAPERTURECTL, order << 1);
+	pci_write_config_dword(nb, AMD64_GARTAPERTUREBASE, aper >> 25);
 
 	return 0;
 }
diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h
index 248e5778e92..6f22786d2f0 100644
--- a/include/asm-x86/gart.h
+++ b/include/asm-x86/gart.h
@@ -52,4 +52,21 @@ static inline void gart_iommu_shutdown(void)
 #define AMD64_GARTCACHECTL	0x9c
 #define AMD64_GARTEN		(1<<0)
 
+static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
+{
+	u32 tmp, ctl;
+
+        /* address of the mappings table */
+        addr >>= 12;
+        tmp = (u32) addr<<4;
+        tmp &= ~0xf;
+        pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp);
+
+        /* Enable GART translation for this hammer. */
+        pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
+        ctl |= GARTEN;
+        ctl &= ~(DISGARTCPU | DISGARTIO);
+        pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
+}
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From dedd4915af40cff6614707c50dcae43d17beadec Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 17 May 2008 08:28:33 +0200
Subject: bitops: fix build in struct thread_info

we can move flags from u32 to natural size - assembly code uses
offsetof.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/thread_info.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 74481b72ae0..25d71053256 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -24,10 +24,10 @@ struct exec_domain;
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
 	struct exec_domain	*exec_domain;	/* execution domain */
-	__u32			flags;		/* low level flags */
+	unsigned long		flags;		/* low level flags */
 	__u32			status;		/* thread synchronous flags */
 	__u32			cpu;		/* current CPU */
-	int 			preempt_count;	/* 0 => preemptable,
+	int			preempt_count;	/* 0 => preemptable,
 						   <0 => BUG */
 	mm_segment_t		addr_limit;
 	struct restart_block    restart_block;
-- 
cgit v1.2.3-70-g09d2


From 0abbc78a0137fee60ef092f0b20a3d3d7e7e0cc2 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Tue, 20 May 2008 16:27:17 +0200
Subject: x86, aperture_64: use symbolic constants

Factor-out common aperture_valid code.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/aperture_64.c | 23 +----------------------
 drivers/char/agp/amd64-agp.c  | 22 ++++------------------
 include/asm-x86/gart.h        | 24 ++++++++++++++++++++++++
 3 files changed, 29 insertions(+), 40 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 02f4dbaa4df..5373f7834d8 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -109,27 +109,6 @@ static u32 __init allocate_aperture(void)
 	return (u32)__pa(p);
 }
 
-static int __init aperture_valid(u64 aper_base, u32 aper_size, u32 min_size)
-{
-	if (!aper_base)
-		return 0;
-
-	if (aper_base + aper_size > 0x100000000UL) {
-		printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n");
-		return 0;
-	}
-	if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
-		printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n");
-		return 0;
-	}
-	if (aper_size < min_size) {
-		printk(KERN_ERR "Aperture too small (%d MB) than (%d MB)\n",
-				 aper_size>>20, min_size>>20);
-		return 0;
-	}
-
-	return 1;
-}
 
 /* Find a PCI capability */
 static __u32 __init find_cap(int bus, int slot, int func, int cap)
@@ -344,7 +323,7 @@ out:
 	if (gart_fix_e820 && !fix && aper_enabled) {
 		if (!e820_all_mapped(aper_base, aper_base + aper_size,
 				    E820_RESERVED)) {
-			/* reserved it, so we can resuse it in second kernel */
+			/* reserve it, so we can reuse it in second kernel */
 			printk(KERN_INFO "update e820 for GART\n");
 			add_memory_region(aper_base, aper_size, E820_RESERVED);
 			update_e820();
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index e3c7ea07f57..f5af65ac8c7 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -228,24 +228,10 @@ static const struct agp_bridge_driver amd_8151_driver = {
 };
 
 /* Some basic sanity checks for the aperture. */
-static int __devinit aperture_valid(u64 aper, u32 size)
+static int __devinit agp_aperture_valid(u64 aper, u32 size)
 {
-	if (aper == 0) {
-		printk(KERN_ERR PFX "No aperture\n");
+	if (!aperture_valid(aper, size, 32*1024*1024))
 		return 0;
-	}
-	if ((u64)aper + size > 0x100000000ULL) {
-		printk(KERN_ERR PFX "Aperture out of bounds\n");
-		return 0;
-	}
-	if (e820_any_mapped(aper, aper + size, E820_RAM)) {
-		printk(KERN_ERR PFX "Aperture pointing to RAM\n");
-		return 0;
-	}
-	if (size < 32*1024*1024) {
-		printk(KERN_ERR PFX "Aperture too small (%d MB)\n", size>>20);
-		return 0;
-	}
 
 	/* Request the Aperture. This catches cases when someone else
 	   already put a mapping in there - happens with some very broken BIOS
@@ -282,7 +268,7 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp,
 	nb_order = (nb_order >> 1) & 7;
 	pci_read_config_dword(nb, AMD64_GARTAPERTUREBASE, &nb_base);
 	nb_aper = nb_base << 25;
-	if (aperture_valid(nb_aper, (32*1024*1024)<<nb_order)) {
+	if (agp_aperture_valid(nb_aper, (32*1024*1024)<<nb_order)) {
 		return 0;
 	}
 
@@ -313,7 +299,7 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp,
 	}
 
 	printk(KERN_INFO PFX "Aperture from AGP @ %Lx size %u MB\n", aper, 32 << order);
-	if (order < 0 || !aperture_valid(aper, (32*1024*1024)<<order))
+	if (order < 0 || !agp_aperture_valid(aper, (32*1024*1024)<<order))
 		return -1;
 
 	pci_write_config_dword(nb, AMD64_GARTAPERTURECTL, order << 1);
diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h
index 6f22786d2f0..c818b96f936 100644
--- a/include/asm-x86/gart.h
+++ b/include/asm-x86/gart.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X8664_IOMMU_H
 #define _ASM_X8664_IOMMU_H 1
 
+#include <asm/e820.h>
+
 extern void pci_iommu_shutdown(void);
 extern void no_iommu_init(void);
 extern int force_iommu, no_iommu;
@@ -69,4 +71,26 @@ static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
         pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
 }
 
+static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size)
+{
+	if (!aper_base)
+		return 0;
+
+	if (aper_base + aper_size > 0x100000000ULL) {
+		printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n");
+		return 0;
+	}
+	if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
+		printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n");
+		return 0;
+	}
+	if (aper_size < min_size) {
+		printk(KERN_ERR "Aperture too small (%d MB) than (%d MB)\n",
+				 aper_size>>20, min_size>>20);
+		return 0;
+	}
+
+	return 1;
+}
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 3843fc2575e3389f4f0ad0420a720240a5746a5d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Fri, 9 May 2008 12:05:57 +0100
Subject: xen: remove support for non-PAE 32-bit

Non-PAE operation has been deprecated in Xen for a while, and is
rarely tested or used.  xen-unstable has now officially dropped
non-PAE support.  Since Xen/pvops' non-PAE support has also been
broken for a while, we may as well completely drop it altogether.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/Kconfig       |  2 +-
 arch/x86/xen/enlighten.c   | 51 ++++++++++++++++------------------------------
 arch/x86/xen/mmu.c         | 19 ++---------------
 arch/x86/xen/mmu.h         | 24 ++++++----------------
 arch/x86/xen/xen-head.S    |  4 ----
 include/asm-x86/xen/page.h |  4 ----
 6 files changed, 27 insertions(+), 77 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 2e641be2737..525b108411b 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -6,7 +6,7 @@ config XEN
 	bool "Xen guest support"
 	select PARAVIRT
 	depends on X86_32
-	depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
+	depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER)
 	help
 	  This is the Linux Xen port.  Enabling this will allow the
 	  kernel to boot in a paravirtualized environment under the
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c8a56e457d6..a05b7721bc8 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -785,38 +785,35 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
 static __init void xen_pagetable_setup_start(pgd_t *base)
 {
 	pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
+	int i;
 
 	/* special set_pte for pagetable initialization */
 	pv_mmu_ops.set_pte = xen_set_pte_init;
 
 	init_mm.pgd = base;
 	/*
-	 * copy top-level of Xen-supplied pagetable into place.	 For
-	 * !PAE we can use this as-is, but for PAE it is a stand-in
-	 * while we copy the pmd pages.
+	 * copy top-level of Xen-supplied pagetable into place.  This
+	 * is a stand-in while we copy the pmd pages.
 	 */
 	memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
 
-	if (PTRS_PER_PMD > 1) {
-		int i;
-		/*
-		 * For PAE, need to allocate new pmds, rather than
-		 * share Xen's, since Xen doesn't like pmd's being
-		 * shared between address spaces.
-		 */
-		for (i = 0; i < PTRS_PER_PGD; i++) {
-			if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
-				pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+	/*
+	 * For PAE, need to allocate new pmds, rather than
+	 * share Xen's, since Xen doesn't like pmd's being
+	 * shared between address spaces.
+	 */
+	for (i = 0; i < PTRS_PER_PGD; i++) {
+		if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
+			pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
 
-				memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
-				       PAGE_SIZE);
+			memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
+			       PAGE_SIZE);
 
-				make_lowmem_page_readonly(pmd);
+			make_lowmem_page_readonly(pmd);
 
-				set_pgd(&base[i], __pgd(1 + __pa(pmd)));
-			} else
-				pgd_clear(&base[i]);
-		}
+			set_pgd(&base[i], __pgd(1 + __pa(pmd)));
+		} else
+			pgd_clear(&base[i]);
 	}
 
 	/* make sure zero_page is mapped RO so we can use it in pagetables */
@@ -873,17 +870,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
 
 	/* Actually pin the pagetable down, but we can't set PG_pinned
 	   yet because the page structures don't exist yet. */
-	{
-		unsigned level;
-
-#ifdef CONFIG_X86_PAE
-		level = MMUEXT_PIN_L3_TABLE;
-#else
-		level = MMUEXT_PIN_L2_TABLE;
-#endif
-
-		pin_pagetable_pfn(level, PFN_DOWN(__pa(base)));
-	}
+	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
 }
 
 /* This is called once we have the cpu_possible_map */
@@ -1093,7 +1080,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.make_pte = xen_make_pte,
 	.make_pgd = xen_make_pgd,
 
-#ifdef CONFIG_X86_PAE
 	.set_pte_atomic = xen_set_pte_atomic,
 	.set_pte_present = xen_set_pte_at,
 	.set_pud = xen_set_pud,
@@ -1102,7 +1088,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
 	.make_pmd = xen_make_pmd,
 	.pmd_val = xen_pmd_val,
-#endif	/* PAE */
 
 	.activate_mm = xen_activate_mm,
 	.dup_mmap = xen_dup_mmap,
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 126766d43ae..07c2653ec33 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -222,7 +222,7 @@ pmdval_t xen_pmd_val(pmd_t pmd)
 		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
 	return ret;
 }
-#ifdef CONFIG_X86_PAE
+
 void xen_set_pud(pud_t *ptr, pud_t val)
 {
 	struct multicall_space mcs;
@@ -272,12 +272,6 @@ pmd_t xen_make_pmd(pmdval_t pmd)
 
 	return native_make_pmd(pmd);
 }
-#else  /* !PAE */
-void xen_set_pte(pte_t *ptep, pte_t pte)
-{
-	*ptep = pte;
-}
-#endif	/* CONFIG_X86_PAE */
 
 /*
   (Yet another) pagetable walker.  This one is intended for pinning a
@@ -430,8 +424,6 @@ static int pin_page(struct page *page, enum pt_level level)
    read-only, and can be pinned. */
 void xen_pgd_pin(pgd_t *pgd)
 {
-	unsigned level;
-
 	xen_mc_batch();
 
 	if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
@@ -441,14 +433,7 @@ void xen_pgd_pin(pgd_t *pgd)
 		xen_mc_batch();
 	}
 
-#ifdef CONFIG_X86_PAE
-	level = MMUEXT_PIN_L3_TABLE;
-#else
-	level = MMUEXT_PIN_L2_TABLE;
-#endif
-
-	xen_do_pin(level, PFN_DOWN(__pa(pgd)));
-
+	xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
 	xen_mc_issue(0);
 }
 
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index b5e189b1519..5fe961caffd 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -37,14 +37,13 @@ void xen_exit_mmap(struct mm_struct *mm);
 void xen_pgd_pin(pgd_t *pgd);
 //void xen_pgd_unpin(pgd_t *pgd);
 
-#ifdef CONFIG_X86_PAE
-unsigned long long xen_pte_val(pte_t);
-unsigned long long xen_pmd_val(pmd_t);
-unsigned long long xen_pgd_val(pgd_t);
+pteval_t xen_pte_val(pte_t);
+pmdval_t xen_pmd_val(pmd_t);
+pgdval_t xen_pgd_val(pgd_t);
 
-pte_t xen_make_pte(unsigned long long);
-pmd_t xen_make_pmd(unsigned long long);
-pgd_t xen_make_pgd(unsigned long long);
+pte_t xen_make_pte(pteval_t);
+pmd_t xen_make_pmd(pmdval_t);
+pgd_t xen_make_pgd(pgdval_t);
 
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval);
@@ -53,15 +52,4 @@ void xen_set_pud(pud_t *ptr, pud_t val);
 void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
 void xen_pmd_clear(pmd_t *pmdp);
 
-
-#else
-unsigned long xen_pte_val(pte_t);
-unsigned long xen_pmd_val(pmd_t);
-unsigned long xen_pgd_val(pgd_t);
-
-pte_t xen_make_pte(unsigned long);
-pmd_t xen_make_pmd(unsigned long);
-pgd_t xen_make_pgd(unsigned long);
-#endif
-
 #endif	/* _XEN_MMU_H */
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 288d587ce73..2ab5f42f34d 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -30,11 +30,7 @@ ENTRY(hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .long  startup_xen)
 	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long  hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz "!writable_page_tables|pae_pgdir_above_4gb")
-#ifdef CONFIG_X86_PAE
 	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "yes")
-#else
-	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "no")
-#endif
 	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
 
 #endif /*CONFIG_XEN */
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index baf3a4dce28..e11f24038b1 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -150,13 +150,9 @@ static inline pte_t __pte_ma(pteval_t x)
 	return (pte_t) { .pte = x };
 }
 
-#ifdef CONFIG_X86_PAE
 #define pmd_val_ma(v) ((v).pmd)
 #define pud_val_ma(v) ((v).pgd.pgd)
 #define __pmd_ma(x)	((pmd_t) { (x) } )
-#else  /* !X86_PAE */
-#define pmd_val_ma(v)	((v).pud.pgd.pgd)
-#endif	/* CONFIG_X86_PAE */
 
 #define pgd_val_ma(x)	((x).pgd)
 
-- 
cgit v1.2.3-70-g09d2


From 23adec554a7648f99c8acc0caf49c66320cd2b84 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 12 May 2008 21:20:41 +0200
Subject: x86: add notrace annotations to vsyscall.

Add the notrace annotations to the vsyscall functions - there we are
not in kernel context yet, so the tracer function cannot (and must not)
be called.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/vsyscall_64.c  |  3 ++-
 arch/x86/vdso/vclock_gettime.c | 15 ++++++++-------
 arch/x86/vdso/vgetcpu.c        |  3 ++-
 include/asm-x86/vsyscall.h     |  3 ++-
 4 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 61efa2f7d56..4063dfa2a02 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -42,7 +42,8 @@
 #include <asm/topology.h>
 #include <asm/vgtod.h>
 
-#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+#define __vsyscall(nr) \
+		__attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
 #define __syscall_clobber "r11","cx","memory"
 
 /*
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 23476c2ebfc..5cb8f754c52 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -23,7 +23,7 @@
 
 #define gtod vdso_vsyscall_gtod_data
 
-static long vdso_fallback_gettime(long clock, struct timespec *ts)
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
 {
 	long ret;
 	asm("syscall" : "=a" (ret) :
@@ -31,7 +31,7 @@ static long vdso_fallback_gettime(long clock, struct timespec *ts)
 	return ret;
 }
 
-static inline long vgetns(void)
+notrace static inline long vgetns(void)
 {
 	long v;
 	cycles_t (*vread)(void);
@@ -40,7 +40,7 @@ static inline long vgetns(void)
 	return (v * gtod->clock.mult) >> gtod->clock.shift;
 }
 
-static noinline int do_realtime(struct timespec *ts)
+notrace static noinline int do_realtime(struct timespec *ts)
 {
 	unsigned long seq, ns;
 	do {
@@ -54,7 +54,8 @@ static noinline int do_realtime(struct timespec *ts)
 }
 
 /* Copy of the version in kernel/time.c which we cannot directly access */
-static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
+notrace static void
+vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
 {
 	while (nsec >= NSEC_PER_SEC) {
 		nsec -= NSEC_PER_SEC;
@@ -68,7 +69,7 @@ static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
 	ts->tv_nsec = nsec;
 }
 
-static noinline int do_monotonic(struct timespec *ts)
+notrace static noinline int do_monotonic(struct timespec *ts)
 {
 	unsigned long seq, ns, secs;
 	do {
@@ -82,7 +83,7 @@ static noinline int do_monotonic(struct timespec *ts)
 	return 0;
 }
 
-int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 {
 	if (likely(gtod->sysctl_enabled && gtod->clock.vread))
 		switch (clock) {
@@ -96,7 +97,7 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 int clock_gettime(clockid_t, struct timespec *)
 	__attribute__((weak, alias("__vdso_clock_gettime")));
 
-int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
 	long ret;
 	if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index c8097f17f8a..9fbc6b20026 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -13,7 +13,8 @@
 #include <asm/vgtod.h>
 #include "vextern.h"
 
-long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
+notrace long
+__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
 {
 	unsigned int p;
 
diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h
index 17b3700949b..6b66ff905af 100644
--- a/include/asm-x86/vsyscall.h
+++ b/include/asm-x86/vsyscall.h
@@ -24,7 +24,8 @@ enum vsyscall_num {
 	((unused, __section__ (".vsyscall_gtod_data"),aligned(16)))
 #define __section_vsyscall_clock __attribute__ \
 	((unused, __section__ (".vsyscall_clock"),aligned(16)))
-#define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn")))
+#define __vsyscall_fn \
+	__attribute__ ((unused, __section__(".vsyscall_fn"))) notrace
 
 #define VGETCPU_RDTSCP	1
 #define VGETCPU_LSL	2
-- 
cgit v1.2.3-70-g09d2


From 81d68a96a39844853b37f20cc8282d9b65b78ef3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 12 May 2008 21:20:42 +0200
Subject: ftrace: trace irq disabled critical timings

This patch adds latency tracing for critical timings
(how long interrupts are disabled for).

 "irqsoff" is added to /debugfs/tracing/available_tracers

Note:
  tracing_max_latency
    also holds the max latency for irqsoff (in usecs).
   (default to large number so one must start latency tracing)

  tracing_thresh
    threshold (in usecs) to always print out if irqs off
    is detected to be longer than stated here.
    If irq_thresh is non-zero, then max_irq_latency
    is ignored.

Here's an example of a trace with ftrace_enabled = 0

=======
preemption latency trace v1.1.5 on 2.6.24-rc7
Signed-off-by: Ingo Molnar <mingo@elte.hu>
--------------------------------------------------------------------
 latency: 100 us, #3/3, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2)
    -----------------
    | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0)
    -----------------
 => started at: _spin_lock_irqsave+0x2a/0xb7
 => ended at:   _spin_unlock_irqrestore+0x32/0x5f

                 _------=> CPU#
                / _-----=> irqs-off
               | / _----=> need-resched
               || / _---=> hardirq/softirq
               ||| / _--=> preempt-depth
               |||| /
               |||||     delay
   cmd     pid ||||| time  |   caller
      \   /    |||||   \   |   /
 swapper-0     1d.s3    0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000])
 swapper-0     1d.s3  100us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000])
 swapper-0     1d.s3  100us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f)

vim:ft=help
=======

And this is a trace with ftrace_enabled == 1

=======
preemption latency trace v1.1.5 on 2.6.24-rc7
--------------------------------------------------------------------
 latency: 102 us, #12/12, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2)
    -----------------
    | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0)
    -----------------
 => started at: _spin_lock_irqsave+0x2a/0xb7
 => ended at:   _spin_unlock_irqrestore+0x32/0x5f

                 _------=> CPU#
                / _-----=> irqs-off
               | / _----=> need-resched
               || / _---=> hardirq/softirq
               ||| / _--=> preempt-depth
               |||| /
               |||||     delay
   cmd     pid ||||| time  |   caller
      \   /    |||||   \   |   /
 swapper-0     1dNs3    0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000])
 swapper-0     1dNs3   46us : e1000_read_phy_reg+0x16/0x225 [e1000] (e1000_update_stats+0x5e2/0x64c [e1000])
 swapper-0     1dNs3   46us : e1000_swfw_sync_acquire+0x10/0x99 [e1000] (e1000_read_phy_reg+0x49/0x225 [e1000])
 swapper-0     1dNs3   46us : e1000_get_hw_eeprom_semaphore+0x12/0xa6 [e1000] (e1000_swfw_sync_acquire+0x36/0x99 [e1000])
 swapper-0     1dNs3   47us : __const_udelay+0x9/0x47 (e1000_read_phy_reg+0x116/0x225 [e1000])
 swapper-0     1dNs3   47us+: __delay+0x9/0x50 (__const_udelay+0x45/0x47)
 swapper-0     1dNs3   97us : preempt_schedule+0xc/0x84 (__delay+0x4e/0x50)
 swapper-0     1dNs3   98us : e1000_swfw_sync_release+0xc/0x55 [e1000] (e1000_read_phy_reg+0x211/0x225 [e1000])
 swapper-0     1dNs3   99us+: e1000_put_hw_eeprom_semaphore+0x9/0x35 [e1000] (e1000_swfw_sync_release+0x50/0x55 [e1000])
 swapper-0     1dNs3  101us : _spin_unlock_irqrestore+0xe/0x5f (e1000_update_stats+0x641/0x64c [e1000])
 swapper-0     1dNs3  102us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000])
 swapper-0     1dNs3  102us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f)

vim:ft=help
=======

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process_64.c |   3 +
 arch/x86/lib/Makefile        |   1 +
 arch/x86/lib/thunk_32.S      |  47 +++++
 arch/x86/lib/thunk_64.S      |  19 +-
 include/asm-x86/irqflags.h   |  24 +--
 include/linux/ftrace.h       |   8 +
 include/linux/irqflags.h     |  12 +-
 kernel/fork.c                |   2 +-
 kernel/lockdep.c             |  23 ++-
 kernel/printk.c              |   2 +
 kernel/trace/Kconfig         |  18 ++
 kernel/trace/Makefile        |   1 +
 kernel/trace/trace_irqsoff.c | 402 +++++++++++++++++++++++++++++++++++++++++++
 13 files changed, 531 insertions(+), 31 deletions(-)
 create mode 100644 arch/x86/lib/thunk_32.S
 create mode 100644 kernel/trace/trace_irqsoff.c

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e2319f39988..dd349c92f05 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -165,7 +165,10 @@ void cpu_idle(void)
 			 */
 			local_irq_disable();
 			enter_idle();
+			/* Don't trace irqs off for idle */
+			stop_critical_timings();
 			idle();
+			start_critical_timings();
 			/* In many cases the interrupt that ended idle
 			   has already called exit_idle. But some idle
 			   loops can be woken up without interrupt. */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 76f60f52a88..84aa2883fe1 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -5,6 +5,7 @@
 obj-$(CONFIG_SMP) := msr-on-cpu.o
 
 lib-y := delay_$(BITS).o
+lib-y += thunk_$(BITS).o
 lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o
 lib-y += memcpy_$(BITS).o
 
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
new file mode 100644
index 00000000000..650b11e00ec
--- /dev/null
+++ b/arch/x86/lib/thunk_32.S
@@ -0,0 +1,47 @@
+/*
+ * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
+ * Copyright 2008 by Steven Rostedt, Red Hat, Inc
+ *  (inspired by Andi Kleen's thunk_64.S)
+ * Subject to the GNU public license, v.2. No warranty of any kind.
+ */
+
+	#include <linux/linkage.h>
+
+#define ARCH_TRACE_IRQS_ON			\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call trace_hardirqs_on;			\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+
+#define ARCH_TRACE_IRQS_OFF			\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call trace_hardirqs_off;		\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	/* put return address in eax (arg1) */
+	.macro thunk_ra name,func
+	.globl \name
+\name:
+	pushl %eax
+	pushl %ecx
+	pushl %edx
+	/* Place EIP in the arg1 */
+	movl 3*4(%esp), %eax
+	call \func
+	popl %edx
+	popl %ecx
+	popl %eax
+	ret
+	.endm
+
+	thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
+	thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
+#endif
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index e009251d4e9..bf9a7d5a542 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -2,6 +2,7 @@
  * Save registers before calling assembly functions. This avoids
  * disturbance of register allocation in some inline assembly constructs.
  * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
+ * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
  * Subject to the GNU public license, v.2. No warranty of any kind.
  */
 
@@ -42,8 +43,22 @@
 #endif	
 	
 #ifdef CONFIG_TRACE_IRQFLAGS
-	thunk trace_hardirqs_on_thunk,trace_hardirqs_on
-	thunk trace_hardirqs_off_thunk,trace_hardirqs_off
+	/* put return address in rdi (arg1) */
+	.macro thunk_ra name,func
+	.globl \name
+\name:
+	CFI_STARTPROC
+	SAVE_ARGS
+	/* SAVE_ARGS pushs 9 elements */
+	/* the next element would be the rip */
+	movq 9*8(%rsp), %rdi
+	call \func
+	jmp  restore
+	CFI_ENDPROC
+	.endm
+
+	thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
+	thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
 #endif
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index c242527f970..24d71b1eb18 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -179,8 +179,6 @@ static inline void trace_hardirqs_fixup(void)
  * have a reliable stack. x86_64 only.
  */
 #define SWAPGS_UNSAFE_STACK	swapgs
-#define ARCH_TRACE_IRQS_ON		call trace_hardirqs_on_thunk
-#define ARCH_TRACE_IRQS_OFF		call trace_hardirqs_off_thunk
 #define ARCH_LOCKDEP_SYS_EXIT		call lockdep_sys_exit_thunk
 #define ARCH_LOCKDEP_SYS_EXIT_IRQ	\
 	TRACE_IRQS_ON; \
@@ -192,24 +190,6 @@ static inline void trace_hardirqs_fixup(void)
 	TRACE_IRQS_OFF;
 
 #else
-#define ARCH_TRACE_IRQS_ON			\
-	pushl %eax;				\
-	pushl %ecx;				\
-	pushl %edx;				\
-	call trace_hardirqs_on;			\
-	popl %edx;				\
-	popl %ecx;				\
-	popl %eax;
-
-#define ARCH_TRACE_IRQS_OFF			\
-	pushl %eax;				\
-	pushl %ecx;				\
-	pushl %edx;				\
-	call trace_hardirqs_off;		\
-	popl %edx;				\
-	popl %ecx;				\
-	popl %eax;
-
 #define ARCH_LOCKDEP_SYS_EXIT			\
 	pushl %eax;				\
 	pushl %ecx;				\
@@ -223,8 +203,8 @@ static inline void trace_hardirqs_fixup(void)
 #endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
-#  define TRACE_IRQS_ON		ARCH_TRACE_IRQS_ON
-#  define TRACE_IRQS_OFF	ARCH_TRACE_IRQS_OFF
+#  define TRACE_IRQS_ON		call trace_hardirqs_on_thunk;
+#  define TRACE_IRQS_OFF	call trace_hardirqs_off_thunk;
 #else
 #  define TRACE_IRQS_ON
 #  define TRACE_IRQS_OFF
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index db8a5e7abe4..0a20445dcbc 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -50,4 +50,12 @@ extern void mcount(void);
 # define CALLER_ADDR5 0UL
 #endif
 
+#ifdef CONFIG_IRQSOFF_TRACER
+  extern void notrace time_hardirqs_on(unsigned long a0, unsigned long a1);
+  extern void notrace time_hardirqs_off(unsigned long a0, unsigned long a1);
+#else
+# define time_hardirqs_on(a0, a1)		do { } while (0)
+# define time_hardirqs_off(a0, a1)		do { } while (0)
+#endif
+
 #endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index e600c4e9b8c..5b711d4e9fd 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -12,10 +12,10 @@
 #define _LINUX_TRACE_IRQFLAGS_H
 
 #ifdef CONFIG_TRACE_IRQFLAGS
-  extern void trace_hardirqs_on(void);
-  extern void trace_hardirqs_off(void);
   extern void trace_softirqs_on(unsigned long ip);
   extern void trace_softirqs_off(unsigned long ip);
+  extern void trace_hardirqs_on(void);
+  extern void trace_hardirqs_off(void);
 # define trace_hardirq_context(p)	((p)->hardirq_context)
 # define trace_softirq_context(p)	((p)->softirq_context)
 # define trace_hardirqs_enabled(p)	((p)->hardirqs_enabled)
@@ -41,6 +41,14 @@
 # define INIT_TRACE_IRQFLAGS
 #endif
 
+#ifdef CONFIG_IRQSOFF_TRACER
+ extern void stop_critical_timings(void);
+ extern void start_critical_timings(void);
+#else
+# define stop_critical_timings() do { } while (0)
+# define start_critical_timings() do { } while (0)
+#endif
+
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 
 #include <asm/irqflags.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 19908b26cf8..d66d676dc36 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -909,7 +909,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	rt_mutex_init_task(p);
 
-#ifdef CONFIG_TRACE_IRQFLAGS
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_LOCKDEP)
 	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
 	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
 #endif
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 81a4e4a3f08..e21924365ea 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -39,6 +39,7 @@
 #include <linux/irqflags.h>
 #include <linux/utsname.h>
 #include <linux/hash.h>
+#include <linux/ftrace.h>
 
 #include <asm/sections.h>
 
@@ -982,7 +983,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
 	return 1;
 }
 
-#ifdef CONFIG_TRACE_IRQFLAGS
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
 /*
  * Forwards and backwards subgraph searching, for the purposes of
  * proving that two subgraphs can be connected by a new dependency
@@ -1680,7 +1681,7 @@ valid_state(struct task_struct *curr, struct held_lock *this,
 static int mark_lock(struct task_struct *curr, struct held_lock *this,
 		     enum lock_usage_bit new_bit);
 
-#ifdef CONFIG_TRACE_IRQFLAGS
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
 
 /*
  * print irq inversion bug:
@@ -2013,11 +2014,13 @@ void early_boot_irqs_on(void)
 /*
  * Hardirqs will be enabled:
  */
-void trace_hardirqs_on(void)
+void notrace trace_hardirqs_on_caller(unsigned long a0)
 {
 	struct task_struct *curr = current;
 	unsigned long ip;
 
+	time_hardirqs_on(CALLER_ADDR0, a0);
+
 	if (unlikely(!debug_locks || current->lockdep_recursion))
 		return;
 
@@ -2055,16 +2058,23 @@ void trace_hardirqs_on(void)
 	curr->hardirq_enable_event = ++curr->irq_events;
 	debug_atomic_inc(&hardirqs_on_events);
 }
+EXPORT_SYMBOL(trace_hardirqs_on_caller);
 
+void notrace trace_hardirqs_on(void)
+{
+	trace_hardirqs_on_caller(CALLER_ADDR0);
+}
 EXPORT_SYMBOL(trace_hardirqs_on);
 
 /*
  * Hardirqs were disabled:
  */
-void trace_hardirqs_off(void)
+void notrace trace_hardirqs_off_caller(unsigned long a0)
 {
 	struct task_struct *curr = current;
 
+	time_hardirqs_off(CALLER_ADDR0, a0);
+
 	if (unlikely(!debug_locks || current->lockdep_recursion))
 		return;
 
@@ -2082,7 +2092,12 @@ void trace_hardirqs_off(void)
 	} else
 		debug_atomic_inc(&redundant_hardirqs_off);
 }
+EXPORT_SYMBOL(trace_hardirqs_off_caller);
 
+void notrace trace_hardirqs_off(void)
+{
+	trace_hardirqs_off_caller(CALLER_ADDR0);
+}
 EXPORT_SYMBOL(trace_hardirqs_off);
 
 /*
diff --git a/kernel/printk.c b/kernel/printk.c
index 8fb01c32aa3..ae7d5b9e535 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1041,7 +1041,9 @@ void release_console_sem(void)
 		_log_end = log_end;
 		con_start = log_end;		/* Flush */
 		spin_unlock(&logbuf_lock);
+		stop_critical_timings();	/* don't trace print latency */
 		call_console_drivers(_con_start, _log_end);
+		start_critical_timings();
 		local_irq_restore(flags);
 	}
 	console_locked = 0;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 892ecc94a82..896df1cf6ad 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -26,6 +26,24 @@ config FTRACE
 	  (the bootup default), then the overhead of the instructions is very
 	  small and not measurable even in micro-benchmarks.
 
+config IRQSOFF_TRACER
+	bool "Interrupts-off Latency Tracer"
+	default n
+	depends on TRACE_IRQFLAGS_SUPPORT
+	depends on GENERIC_TIME
+	select TRACE_IRQFLAGS
+	select TRACING
+	select TRACER_MAX_TRACE
+	help
+	  This option measures the time spent in irqs-off critical
+	  sections, with microsecond accuracy.
+
+	  The default measurement method is a maximum search, which is
+	  disabled by default and can be runtime (re-)started
+	  via:
+
+	      echo 0 > /debugfs/tracing/tracing_max_latency
+
 config SCHED_TRACER
 	bool "Scheduling Latency Tracer"
 	depends on DEBUG_KERNEL
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 5508cdb19ae..46be8647fb6 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_FTRACE) += libftrace.o
 obj-$(CONFIG_TRACING) += trace.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
 obj-$(CONFIG_FTRACE) += trace_functions.o
+obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
new file mode 100644
index 00000000000..a9131b0cf1a
--- /dev/null
+++ b/kernel/trace/trace_irqsoff.c
@@ -0,0 +1,402 @@
+/*
+ * trace irqs off criticall timings
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
+ *
+ * From code in the latency_tracer, that is:
+ *
+ *  Copyright (C) 2004-2006 Ingo Molnar
+ *  Copyright (C) 2004 William Lee Irwin III
+ */
+#include <linux/kallsyms.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/fs.h>
+
+#include "trace.h"
+
+static struct trace_array		*irqsoff_trace __read_mostly;
+static int				tracer_enabled __read_mostly;
+
+/*
+ * Sequence count - we record it when starting a measurement and
+ * skip the latency if the sequence has changed - some other section
+ * did a maximum and could disturb our measurement with serial console
+ * printouts, etc. Truly coinciding maximum latencies should be rare
+ * and what happens together happens separately as well, so this doesnt
+ * decrease the validity of the maximum found:
+ */
+static __cacheline_aligned_in_smp	unsigned long max_sequence;
+
+#ifdef CONFIG_FTRACE
+/*
+ * irqsoff uses its own tracer function to keep the overhead down:
+ */
+static void notrace
+irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
+{
+	struct trace_array *tr = irqsoff_trace;
+	struct trace_array_cpu *data;
+	unsigned long flags;
+	long disabled;
+	int cpu;
+
+	if (likely(!tracer_enabled))
+		return;
+
+	local_save_flags(flags);
+
+	if (!irqs_disabled_flags(flags))
+		return;
+
+	cpu = raw_smp_processor_id();
+	data = tr->data[cpu];
+	disabled = atomic_inc_return(&data->disabled);
+
+	if (likely(disabled == 1))
+		ftrace(tr, data, ip, parent_ip, flags);
+
+	atomic_dec(&data->disabled);
+}
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+	.func = irqsoff_tracer_call,
+};
+#endif /* CONFIG_FTRACE */
+
+/*
+ * Should this new latency be reported/recorded?
+ */
+static int notrace report_latency(cycle_t delta)
+{
+	if (tracing_thresh) {
+		if (delta < tracing_thresh)
+			return 0;
+	} else {
+		if (delta <= tracing_max_latency)
+			return 0;
+	}
+	return 1;
+}
+
+static void notrace
+check_critical_timing(struct trace_array *tr,
+		      struct trace_array_cpu *data,
+		      unsigned long parent_ip,
+		      int cpu)
+{
+	unsigned long latency, t0, t1;
+	cycle_t T0, T1, T2, delta;
+	unsigned long flags;
+
+	/*
+	 * usecs conversion is slow so we try to delay the conversion
+	 * as long as possible:
+	 */
+	T0 = data->preempt_timestamp;
+	T1 = now(cpu);
+	delta = T1-T0;
+
+	local_save_flags(flags);
+
+	if (!report_latency(delta))
+		goto out;
+
+	ftrace(tr, data, CALLER_ADDR0, parent_ip, flags);
+	/*
+	 * Update the timestamp, because the trace entry above
+	 * might change it (it can only get larger so the latency
+	 * is fair to be reported):
+	 */
+	T2 = now(cpu);
+
+	delta = T2-T0;
+
+	latency = nsecs_to_usecs(delta);
+
+	if (data->critical_sequence != max_sequence)
+		goto out;
+
+	tracing_max_latency = delta;
+	t0 = nsecs_to_usecs(T0);
+	t1 = nsecs_to_usecs(T1);
+
+	data->critical_end = parent_ip;
+
+	update_max_tr_single(tr, current, cpu);
+
+	if (tracing_thresh)
+		printk(KERN_INFO "(%16s-%-5d|#%d): %lu us critical section "
+		       "violates %lu us threshold.\n"
+		       " => started at timestamp %lu: ",
+				current->comm, current->pid,
+				raw_smp_processor_id(),
+				latency, nsecs_to_usecs(tracing_thresh), t0);
+	else
+		printk(KERN_INFO "(%16s-%-5d|#%d):"
+		       " new %lu us maximum-latency "
+		       "critical section.\n => started at timestamp %lu: ",
+				current->comm, current->pid,
+				raw_smp_processor_id(),
+				latency, t0);
+
+	print_symbol(KERN_CONT "<%s>\n", data->critical_start);
+	printk(KERN_CONT " =>   ended at timestamp %lu: ", t1);
+	print_symbol(KERN_CONT "<%s>\n", data->critical_end);
+	dump_stack();
+	t1 = nsecs_to_usecs(now(cpu));
+	printk(KERN_CONT " =>   dump-end timestamp %lu\n\n", t1);
+
+	max_sequence++;
+
+out:
+	data->critical_sequence = max_sequence;
+	data->preempt_timestamp = now(cpu);
+	tracing_reset(data);
+	ftrace(tr, data, CALLER_ADDR0, parent_ip, flags);
+}
+
+static inline void notrace
+start_critical_timing(unsigned long ip, unsigned long parent_ip)
+{
+	int cpu;
+	struct trace_array *tr = irqsoff_trace;
+	struct trace_array_cpu *data;
+	unsigned long flags;
+
+	if (likely(!tracer_enabled))
+		return;
+
+	cpu = raw_smp_processor_id();
+	data = tr->data[cpu];
+
+	if (unlikely(!data) || unlikely(!data->trace) ||
+	    data->critical_start || atomic_read(&data->disabled))
+		return;
+
+	atomic_inc(&data->disabled);
+
+	data->critical_sequence = max_sequence;
+	data->preempt_timestamp = now(cpu);
+	data->critical_start = parent_ip;
+	tracing_reset(data);
+
+	local_save_flags(flags);
+	ftrace(tr, data, ip, parent_ip, flags);
+
+	atomic_dec(&data->disabled);
+}
+
+static inline void notrace
+stop_critical_timing(unsigned long ip, unsigned long parent_ip)
+{
+	int cpu;
+	struct trace_array *tr = irqsoff_trace;
+	struct trace_array_cpu *data;
+	unsigned long flags;
+
+	if (likely(!tracer_enabled))
+		return;
+
+	cpu = raw_smp_processor_id();
+	data = tr->data[cpu];
+
+	if (unlikely(!data) || unlikely(!data->trace) ||
+	    !data->critical_start || atomic_read(&data->disabled))
+		return;
+
+	atomic_inc(&data->disabled);
+	local_save_flags(flags);
+	ftrace(tr, data, ip, parent_ip, flags);
+	check_critical_timing(tr, data, parent_ip, cpu);
+	data->critical_start = 0;
+	atomic_dec(&data->disabled);
+}
+
+void notrace start_critical_timings(void)
+{
+	unsigned long flags;
+
+	local_save_flags(flags);
+
+	if (irqs_disabled_flags(flags))
+		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+
+void notrace stop_critical_timings(void)
+{
+	unsigned long flags;
+
+	local_save_flags(flags);
+
+	if (irqs_disabled_flags(flags))
+		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+
+#ifdef CONFIG_PROVE_LOCKING
+void notrace time_hardirqs_on(unsigned long a0, unsigned long a1)
+{
+	unsigned long flags;
+
+	local_save_flags(flags);
+
+	if (irqs_disabled_flags(flags))
+		stop_critical_timing(a0, a1);
+}
+
+void notrace time_hardirqs_off(unsigned long a0, unsigned long a1)
+{
+	unsigned long flags;
+
+	local_save_flags(flags);
+
+	if (irqs_disabled_flags(flags))
+		start_critical_timing(a0, a1);
+}
+
+#else /* !CONFIG_PROVE_LOCKING */
+
+/*
+ * Stubs:
+ */
+
+void early_boot_irqs_off(void)
+{
+}
+
+void early_boot_irqs_on(void)
+{
+}
+
+void trace_softirqs_on(unsigned long ip)
+{
+}
+
+void trace_softirqs_off(unsigned long ip)
+{
+}
+
+inline void print_irqtrace_events(struct task_struct *curr)
+{
+}
+
+/*
+ * We are only interested in hardirq on/off events:
+ */
+void notrace trace_hardirqs_on(void)
+{
+	unsigned long flags;
+
+	local_save_flags(flags);
+
+	if (irqs_disabled_flags(flags))
+		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+EXPORT_SYMBOL(trace_hardirqs_on);
+
+void notrace trace_hardirqs_off(void)
+{
+	unsigned long flags;
+
+	local_save_flags(flags);
+
+	if (irqs_disabled_flags(flags))
+		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+EXPORT_SYMBOL(trace_hardirqs_off);
+
+void notrace trace_hardirqs_on_caller(unsigned long caller_addr)
+{
+	unsigned long flags;
+
+	local_save_flags(flags);
+
+	if (irqs_disabled_flags(flags))
+		stop_critical_timing(CALLER_ADDR0, caller_addr);
+}
+EXPORT_SYMBOL(trace_hardirqs_on_caller);
+
+void notrace trace_hardirqs_off_caller(unsigned long caller_addr)
+{
+	unsigned long flags;
+
+	local_save_flags(flags);
+
+	if (irqs_disabled_flags(flags))
+		start_critical_timing(CALLER_ADDR0, caller_addr);
+}
+EXPORT_SYMBOL(trace_hardirqs_off_caller);
+
+#endif /* CONFIG_PROVE_LOCKING */
+
+static void start_irqsoff_tracer(struct trace_array *tr)
+{
+	tracer_enabled = 1;
+	register_ftrace_function(&trace_ops);
+}
+
+static void stop_irqsoff_tracer(struct trace_array *tr)
+{
+	unregister_ftrace_function(&trace_ops);
+	tracer_enabled = 0;
+}
+
+static void irqsoff_tracer_init(struct trace_array *tr)
+{
+	irqsoff_trace = tr;
+	/* make sure that the tracer is visibel */
+	smp_wmb();
+
+	if (tr->ctrl)
+		start_irqsoff_tracer(tr);
+}
+
+static void irqsoff_tracer_reset(struct trace_array *tr)
+{
+	if (tr->ctrl)
+		stop_irqsoff_tracer(tr);
+}
+
+static void irqsoff_tracer_ctrl_update(struct trace_array *tr)
+{
+	if (tr->ctrl)
+		start_irqsoff_tracer(tr);
+	else
+		stop_irqsoff_tracer(tr);
+}
+
+static void notrace irqsoff_tracer_open(struct trace_iterator *iter)
+{
+	/* stop the trace while dumping */
+	if (iter->tr->ctrl)
+		stop_irqsoff_tracer(iter->tr);
+}
+
+static void notrace irqsoff_tracer_close(struct trace_iterator *iter)
+{
+	if (iter->tr->ctrl)
+		start_irqsoff_tracer(iter->tr);
+}
+
+static struct tracer irqsoff_tracer __read_mostly =
+{
+	.name		= "irqsoff",
+	.init		= irqsoff_tracer_init,
+	.reset		= irqsoff_tracer_reset,
+	.open		= irqsoff_tracer_open,
+	.close		= irqsoff_tracer_close,
+	.ctrl_update	= irqsoff_tracer_ctrl_update,
+	.print_max	= 1,
+};
+
+__init static int init_irqsoff_tracer(void)
+{
+	register_tracer(&irqsoff_tracer);
+
+	return 0;
+}
+device_initcall(init_irqsoff_tracer);
-- 
cgit v1.2.3-70-g09d2


From dfa60aba04dae7833d75b2e2be124bb7cfb8239f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 12 May 2008 21:20:43 +0200
Subject: ftrace: use nops instead of jmp

This patch patches the call to mcount with nops instead
of a jmp over the mcount call.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/alternative.c |  4 ++--
 arch/x86/kernel/ftrace.c      | 40 ++++++++++++++++++++++++----------------
 include/asm-x86/alternative.h |  2 ++
 3 files changed, 28 insertions(+), 18 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 65c7857a90d..de240ba2e28 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
 #ifdef CONFIG_X86_64
 
 extern char __vsyscall_0;
-static inline const unsigned char*const * find_nop_table(void)
+const unsigned char *const *find_nop_table(void)
 {
 	return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
 	       boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
@@ -162,7 +162,7 @@ static const struct nop {
 	{ -1, NULL }
 };
 
-static const unsigned char*const * find_nop_table(void)
+const unsigned char *const *find_nop_table(void)
 {
 	const unsigned char *const *noptable = intel_nops;
 	int i;
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 5dd58136ef0..2e060c58b86 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -16,11 +16,12 @@
 #include <linux/init.h>
 #include <linux/list.h>
 
-#define CALL_BACK		5
+#include <asm/alternative.h>
 
-#define JMPFWD			0x03eb
+#define CALL_BACK		5
 
-static unsigned short ftrace_jmp = JMPFWD;
+/* Long is fine, even if it is only 4 bytes ;-) */
+static long *ftrace_nop;
 
 struct ftrace_record {
 	struct dyn_ftrace	rec;
@@ -55,13 +56,13 @@ static struct ftrace_page	*ftrace_pages;
 notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip)
 {
 	struct ftrace_record *rec;
-	unsigned short save;
+	unsigned long save;
 
 	ip -= CALL_BACK;
-	save = *(short *)ip;
+	save = *(long *)ip;
 
 	/* If this was already converted, skip it */
-	if (save == JMPFWD)
+	if (save == *ftrace_nop)
 		return NULL;
 
 	if (ftrace_pages->index == ENTRIES_PER_PAGE) {
@@ -79,9 +80,10 @@ static int notrace
 ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		   unsigned char *new_code)
 {
-	unsigned short old = *(unsigned short *)old_code;
-	unsigned short new = *(unsigned short *)new_code;
-	unsigned short replaced;
+	unsigned replaced;
+	unsigned old = *(unsigned *)old_code; /* 4 bytes */
+	unsigned new = *(unsigned *)new_code; /* 4 bytes */
+	unsigned char newch = new_code[4];
 	int faulted = 0;
 
 	/*
@@ -94,7 +96,9 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	 */
 	asm volatile (
 		"1: lock\n"
-		"   cmpxchg %w3, (%2)\n"
+		"   cmpxchg %3, (%2)\n"
+		"   jnz 2f\n"
+		"   movb %b4, 4(%2)\n"
 		"2:\n"
 		".section .fixup, \"ax\"\n"
 		"	movl $1, %0\n"
@@ -102,11 +106,12 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		".previous\n"
 		_ASM_EXTABLE(1b, 3b)
 		: "=r"(faulted), "=a"(replaced)
-		: "r"(ip), "r"(new), "0"(faulted), "a"(old)
+		: "r"(ip), "r"(new), "r"(newch),
+		  "0"(faulted), "a"(old)
 		: "memory");
 	sync_core();
 
-	if (replaced != old)
+	if (replaced != old && replaced != new)
 		faulted = 2;
 
 	return faulted;
@@ -132,7 +137,7 @@ notrace void ftrace_code_disable(struct dyn_ftrace *rec)
 	/* move the IP back to the start of the call */
 	ip -= CALL_BACK;
 
-	r->failed = ftrace_modify_code(ip, save.code, (char *)&ftrace_jmp);
+	r->failed = ftrace_modify_code(ip, save.code, (char *)ftrace_nop);
 }
 
 static void notrace ftrace_replace_code(int saved)
@@ -144,9 +149,9 @@ static void notrace ftrace_replace_code(int saved)
 	int i;
 
 	if (saved)
-		old = (char *)&ftrace_jmp;
+		old = (char *)ftrace_nop;
 	else
-		new = (char *)&ftrace_jmp;
+		new = (char *)ftrace_nop;
 
 	for (pg = ftrace_pages_start; pg; pg = pg->next) {
 		for (i = 0; i < pg->index; i++) {
@@ -194,12 +199,15 @@ notrace void ftrace_shutdown_replenish(void)
 	ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
 }
 
-notrace int ftrace_shutdown_arch_init(void)
+notrace int __init ftrace_shutdown_arch_init(void)
 {
+	const unsigned char *const *noptable = find_nop_table();
 	struct ftrace_page *pg;
 	int cnt;
 	int i;
 
+	ftrace_nop = (unsigned long *)noptable[CALL_BACK];
+
 	/* allocate a few pages */
 	ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
 	if (!ftrace_pages_start)
diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h
index 1f6a9ca1012..f6aa18eadf7 100644
--- a/include/asm-x86/alternative.h
+++ b/include/asm-x86/alternative.h
@@ -72,6 +72,8 @@ static inline void alternatives_smp_module_del(struct module *mod) {}
 static inline void alternatives_smp_switch(int smp) {}
 #endif	/* CONFIG_SMP */
 
+const unsigned char *const *find_nop_table(void);
+
 /*
  * Alternative instructions for different CPU types or capabilities.
  *
-- 
cgit v1.2.3-70-g09d2


From 86069782d62e731b4835a0cf8eb7d1d0e17cf306 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Mon, 12 May 2008 21:20:56 +0200
Subject: x86: add a list for custom page fault handlers.

Provides kernel modules a way to register custom page fault handlers.
On every page fault this will call a list of registered functions. The
functions may handle the fault and force do_page_fault() to return
immediately.

This functionality is similar to the now removed page fault notifiers.
Custom page fault handlers are used by debugging and reverse engineering
tools. Mmiotrace is one such tool and a patch to add it into the tree
will follow.

The custom page fault handlers are called earlier in do_page_fault()
than the page fault notifiers were.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.debug   |  8 +++++++
 arch/x86/mm/fault.c      | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/kdebug.h |  9 ++++++++
 3 files changed, 73 insertions(+)

(limited to 'include/asm-x86')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index ac1e31ba479..9431a839984 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -168,6 +168,14 @@ config IOMMU_LEAK
 	  Add a simple leak tracer to the IOMMU code. This is useful when you
 	  are debugging a buggy device driver that leaks IOMMU mappings.
 
+config PAGE_FAULT_HANDLERS
+	bool "Custom page fault handlers"
+	depends on DEBUG_KERNEL
+	help
+	  Allow the use of custom page fault handlers. A kernel module may
+	  register a function that is called on every page fault. Custom
+	  handlers are used by some debugging and reverse engineering tools.
+
 #
 # IO delay types:
 #
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fd7e1798c75..343f5c1aacc 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -49,6 +49,60 @@
 #define PF_RSVD		(1<<3)
 #define PF_INSTR	(1<<4)
 
+#ifdef CONFIG_PAGE_FAULT_HANDLERS
+static HLIST_HEAD(pf_handlers); /* protected by RCU */
+static DEFINE_SPINLOCK(pf_handlers_writer);
+
+void register_page_fault_handler(struct pf_handler *new_pfh)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&pf_handlers_writer, flags);
+	hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers);
+	spin_unlock_irqrestore(&pf_handlers_writer, flags);
+}
+EXPORT_SYMBOL_GPL(register_page_fault_handler);
+
+/**
+ * unregister_page_fault_handler:
+ * The caller must ensure @old_pfh is not in use anymore before freeing it.
+ * This function does not guarantee it. The list of handlers is protected by
+ * RCU, so you can do this by e.g. calling synchronize_rcu().
+ */
+void unregister_page_fault_handler(struct pf_handler *old_pfh)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&pf_handlers_writer, flags);
+	hlist_del_rcu(&old_pfh->hlist);
+	spin_unlock_irqrestore(&pf_handlers_writer, flags);
+}
+EXPORT_SYMBOL_GPL(unregister_page_fault_handler);
+#endif
+
+/* returns non-zero if do_page_fault() should return */
+static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code,
+							unsigned long address)
+{
+#ifdef CONFIG_PAGE_FAULT_HANDLERS
+	int ret = 0;
+	struct pf_handler *cur;
+	struct hlist_node *ncur;
+
+	if (hlist_empty(&pf_handlers))
+		return 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) {
+		ret = cur->handler(regs, error_code, address);
+		if (ret)
+			break;
+	}
+	rcu_read_unlock();
+	return ret;
+#else
+	return 0;
+#endif
+}
+
 static inline int notify_page_fault(struct pt_regs *regs)
 {
 #ifdef CONFIG_KPROBES
@@ -601,6 +655,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 
 	if (notify_page_fault(regs))
 		return;
+	if (handle_custom_pf(regs, error_code, address))
+		return;
 
 	/*
 	 * We fault-in kernel-space virtual memory on-demand. The
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index 96651bb59ba..a80f2d6cc73 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -35,4 +35,13 @@ extern void show_regs(struct pt_regs *regs);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
 
+struct pf_handler {
+	struct hlist_node hlist;
+	int (*handler)(struct pt_regs *regs, unsigned long error_code,
+						unsigned long address);
+};
+
+extern void register_page_fault_handler(struct pf_handler *new_pfh);
+extern void unregister_page_fault_handler(struct pf_handler *old_pfh);
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 10c43d2eb50c9a5ad60388b9d3c41c31150049e6 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Mon, 12 May 2008 21:20:57 +0200
Subject: x86: explicit call to mmiotrace in do_page_fault()

The custom page fault handler list is replaced with a single function
pointer. All related functions and variables are renamed for
mmiotrace.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: pq@iki.fi
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.debug            | 14 ++++-----
 arch/x86/kernel/mmiotrace/kmmio.c | 14 ++++-----
 arch/x86/mm/fault.c               | 66 ++++++++++++++++++++-------------------
 include/asm-x86/kdebug.h          | 12 +++----
 4 files changed, 52 insertions(+), 54 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 7c6496e2225..9491c0ae03a 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -168,20 +168,18 @@ config IOMMU_LEAK
 	  Add a simple leak tracer to the IOMMU code. This is useful when you
 	  are debugging a buggy device driver that leaks IOMMU mappings.
 
-config PAGE_FAULT_HANDLERS
-	bool "Custom page fault handlers"
-	depends on DEBUG_KERNEL
-	help
-	  Allow the use of custom page fault handlers. A kernel module may
-	  register a function that is called on every page fault. Custom
-	  handlers are used by some debugging and reverse engineering tools.
+config MMIOTRACE_HOOKS
+	bool
+	default n
 
 config MMIOTRACE
 	tristate "Memory mapped IO tracing"
-	depends on DEBUG_KERNEL && PAGE_FAULT_HANDLERS && RELAY && DEBUG_FS
+	depends on DEBUG_KERNEL && RELAY && DEBUG_FS
+	select MMIOTRACE_HOOKS
 	default n
 	help
 	  This will build a kernel module called mmiotrace.
+	  Making this a built-in is heavily discouraged.
 
 	  Mmiotrace traces Memory Mapped I/O access and is meant for debugging
 	  and reverse engineering. The kernel module offers wrapped
diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c
index 28411dadb8b..e759f7c3878 100644
--- a/arch/x86/kernel/mmiotrace/kmmio.c
+++ b/arch/x86/kernel/mmiotrace/kmmio.c
@@ -51,10 +51,6 @@ static LIST_HEAD(kmmio_probes);
 
 static struct kmmio_context kmmio_ctx[NR_CPUS];
 
-static struct pf_handler kmmio_pf_hook = {
-	.handler = kmmio_page_fault
-};
-
 static struct notifier_block nb_die = {
 	.notifier_call = kmmio_die_notifier
 };
@@ -77,7 +73,8 @@ void cleanup_kmmio(void)
 	 * kmmio_page_table, kmmio_probes
 	 */
 	if (handler_registered) {
-		unregister_page_fault_handler(&kmmio_pf_hook);
+		if (mmiotrace_unregister_pf(&kmmio_page_fault))
+			BUG();
 		synchronize_rcu();
 	}
 	unregister_die_notifier(&nb_die);
@@ -343,8 +340,11 @@ int register_kmmio_probe(struct kmmio_probe *p)
 	}
 
 	if (!handler_registered) {
-		register_page_fault_handler(&kmmio_pf_hook);
-		handler_registered++;
+		if (mmiotrace_register_pf(&kmmio_page_fault))
+			printk(KERN_ERR "mmiotrace: Cannot register page "
+					"fault handler.\n");
+		else
+			handler_registered++;
 	}
 
 out:
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 343f5c1aacc..e9a086a1a9f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -49,53 +49,55 @@
 #define PF_RSVD		(1<<3)
 #define PF_INSTR	(1<<4)
 
-#ifdef CONFIG_PAGE_FAULT_HANDLERS
-static HLIST_HEAD(pf_handlers); /* protected by RCU */
-static DEFINE_SPINLOCK(pf_handlers_writer);
+#ifdef CONFIG_MMIOTRACE_HOOKS
+static pf_handler_func mmiotrace_pf_handler; /* protected by RCU */
+static DEFINE_SPINLOCK(mmiotrace_handler_lock);
 
-void register_page_fault_handler(struct pf_handler *new_pfh)
+int mmiotrace_register_pf(pf_handler_func new_pfh)
 {
+	int ret = 0;
 	unsigned long flags;
-	spin_lock_irqsave(&pf_handlers_writer, flags);
-	hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers);
-	spin_unlock_irqrestore(&pf_handlers_writer, flags);
+	spin_lock_irqsave(&mmiotrace_handler_lock, flags);
+	if (mmiotrace_pf_handler)
+		ret = -EBUSY;
+	else
+		mmiotrace_pf_handler = new_pfh;
+	spin_unlock_irqrestore(&mmiotrace_handler_lock, flags);
+	return ret;
 }
-EXPORT_SYMBOL_GPL(register_page_fault_handler);
+EXPORT_SYMBOL_GPL(mmiotrace_register_pf);
 
 /**
- * unregister_page_fault_handler:
+ * mmiotrace_unregister_pf:
  * The caller must ensure @old_pfh is not in use anymore before freeing it.
- * This function does not guarantee it. The list of handlers is protected by
- * RCU, so you can do this by e.g. calling synchronize_rcu().
+ * This function does not guarantee it. The handler function pointer is
+ * protected by RCU, so you can do this by e.g. calling synchronize_rcu().
  */
-void unregister_page_fault_handler(struct pf_handler *old_pfh)
+int mmiotrace_unregister_pf(pf_handler_func old_pfh)
 {
+	int ret = 0;
 	unsigned long flags;
-	spin_lock_irqsave(&pf_handlers_writer, flags);
-	hlist_del_rcu(&old_pfh->hlist);
-	spin_unlock_irqrestore(&pf_handlers_writer, flags);
+	spin_lock_irqsave(&mmiotrace_handler_lock, flags);
+	if (mmiotrace_pf_handler != old_pfh)
+		ret = -EPERM;
+	else
+		mmiotrace_pf_handler = NULL;
+	spin_unlock_irqrestore(&mmiotrace_handler_lock, flags);
+	return ret;
 }
-EXPORT_SYMBOL_GPL(unregister_page_fault_handler);
-#endif
+EXPORT_SYMBOL_GPL(mmiotrace_unregister_pf);
+#endif /* CONFIG_MMIOTRACE_HOOKS */
 
 /* returns non-zero if do_page_fault() should return */
-static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code,
-							unsigned long address)
+static inline int call_mmiotrace(struct pt_regs *regs,
+					unsigned long error_code,
+					unsigned long address)
 {
-#ifdef CONFIG_PAGE_FAULT_HANDLERS
+#ifdef CONFIG_MMIOTRACE_HOOKS
 	int ret = 0;
-	struct pf_handler *cur;
-	struct hlist_node *ncur;
-
-	if (hlist_empty(&pf_handlers))
-		return 0;
-
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) {
-		ret = cur->handler(regs, error_code, address);
-		if (ret)
-			break;
-	}
+	if (mmiotrace_pf_handler)
+		ret = mmiotrace_pf_handler(regs, error_code, address);
 	rcu_read_unlock();
 	return ret;
 #else
@@ -655,7 +657,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 
 	if (notify_page_fault(regs))
 		return;
-	if (handle_custom_pf(regs, error_code, address))
+	if (call_mmiotrace(regs, error_code, address))
 		return;
 
 	/*
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index a80f2d6cc73..7063281040d 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -35,13 +35,11 @@ extern void show_regs(struct pt_regs *regs);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
 
-struct pf_handler {
-	struct hlist_node hlist;
-	int (*handler)(struct pt_regs *regs, unsigned long error_code,
-						unsigned long address);
-};
+typedef int (*pf_handler_func)(struct pt_regs *regs,
+				unsigned long error_code,
+				unsigned long address);
 
-extern void register_page_fault_handler(struct pf_handler *new_pfh);
-extern void unregister_page_fault_handler(struct pf_handler *old_pfh);
+extern int mmiotrace_register_pf(pf_handler_func new_pfh);
+extern int mmiotrace_unregister_pf(pf_handler_func old_pfh);
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 0fd0e3da4557c479b820b9a4a7afa25b4637ddf2 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Mon, 12 May 2008 21:20:57 +0200
Subject: x86: mmiotrace full patch, preview 1

kmmio.c handles the list of mmio probes with callbacks, list of traced
pages, and attaching into the page fault handler and die notifier. It
arms, traps and disarms the given pages, this is the core of mmiotrace.

mmio-mod.c is a user interface, hooking into ioremap functions and
registering the mmio probes. It also decodes the required information
from trapped mmio accesses via the pre and post callbacks in each probe.
Currently, hooking into ioremap functions works by redefining the symbols
of the target (binary) kernel module, so that it calls the traced
versions of the functions.

The most notable changes done since the last discussion are:
- kmmio.c is a built-in, not part of the module
- direct call from fault.c to kmmio.c, removing all dynamic hooks
- prepare for unregistering probes at any time
- make kmmio re-initializable and accessible to more than one user
- rewrite kmmio locking to remove all spinlocks from page fault path

Can I abuse call_rcu() like I do in kmmio.c:unregister_kmmio_probe()
or is there a better way?

The function called via call_rcu() itself calls call_rcu() again,
will this work or break? There I need a second grace period for RCU
after the first grace period for page faults.

Mmiotrace itself (mmio-mod.c) is still a module, I am going to attack
that next. At some point I will start looking into how to make mmiotrace
a tracer component of ftrace (thanks for the hint, Ingo). Ftrace should
make the user space part of mmiotracing as simple as
'cat /debug/trace/mmio > dump.txt'.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/init_task.c               |   1 -
 arch/x86/kernel/mmiotrace/Makefile        |   8 +-
 arch/x86/kernel/mmiotrace/kmmio.c         | 349 ++++++++++++++++++++----------
 arch/x86/kernel/mmiotrace/kmmio.h         |  58 -----
 arch/x86/kernel/mmiotrace/mmio-mod.c      |  81 ++++---
 arch/x86/kernel/mmiotrace/pf_in.c         |   2 +-
 arch/x86/kernel/mmiotrace/testmmiotrace.c |  13 +-
 arch/x86/mm/fault.c                       |  59 +----
 include/asm-x86/kdebug.h                  |   7 -
 include/linux/mmiotrace.h                 |  38 ++++
 10 files changed, 335 insertions(+), 281 deletions(-)
 delete mode 100644 arch/x86/kernel/mmiotrace/kmmio.h

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 027a5b6a12b..a4f93b4120c 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -15,7 +15,6 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
 EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */
-EXPORT_SYMBOL_GPL(init_mm);
 
 /*
  * Initial thread structure.
diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile
index d6905f7f981..cf1e747b463 100644
--- a/arch/x86/kernel/mmiotrace/Makefile
+++ b/arch/x86/kernel/mmiotrace/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
-mmiotrace-objs := pf_in.o kmmio.o mmio-mod.o
-
-obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
+obj-$(CONFIG_MMIOTRACE_HOOKS)	+= kmmio.o
+obj-$(CONFIG_MMIOTRACE)		+= mmiotrace.o
+mmiotrace-objs			:= pf_in.o mmio-mod.o
+obj-$(CONFIG_MMIOTRACE_TEST)	+= testmmiotrace.o
diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c
index 5e239d0b846..539a9b19588 100644
--- a/arch/x86/kernel/mmiotrace/kmmio.c
+++ b/arch/x86/kernel/mmiotrace/kmmio.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/version.h>
+#include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/hash.h>
 #include <linux/init.h>
@@ -17,70 +18,119 @@
 #include <linux/ptrace.h>
 #include <linux/preempt.h>
 #include <linux/percpu.h>
+#include <linux/kdebug.h>
 #include <asm/io.h>
 #include <asm/cacheflush.h>
 #include <asm/errno.h>
 #include <asm/tlbflush.h>
 #include <asm/pgtable.h>
 
-#include "kmmio.h"
+#include <linux/mmiotrace.h>
 
-#define KMMIO_HASH_BITS 6
-#define KMMIO_TABLE_SIZE (1 << KMMIO_HASH_BITS)
 #define KMMIO_PAGE_HASH_BITS 4
 #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
 
+struct kmmio_fault_page {
+	struct list_head list;
+	struct kmmio_fault_page *release_next;
+	unsigned long page; /* location of the fault page */
+
+	/*
+	 * Number of times this page has been registered as a part
+	 * of a probe. If zero, page is disarmed and this may be freed.
+	 * Used only by writers (RCU).
+	 */
+	int count;
+};
+
+struct kmmio_delayed_release {
+	struct rcu_head rcu;
+	struct kmmio_fault_page *release_list;
+};
+
 struct kmmio_context {
 	struct kmmio_fault_page *fpage;
 	struct kmmio_probe *probe;
 	unsigned long saved_flags;
+	unsigned long addr;
 	int active;
 };
 
-static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code,
-						unsigned long address);
 static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
 								void *args);
 
+static DECLARE_MUTEX(kmmio_init_mutex);
 static DEFINE_SPINLOCK(kmmio_lock);
 
 /* These are protected by kmmio_lock */
+static int kmmio_initialized;
 unsigned int kmmio_count;
-static unsigned int handler_registered;
+
+/* Read-protected by RCU, write-protected by kmmio_lock. */
 static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
 static LIST_HEAD(kmmio_probes);
 
+static struct list_head *kmmio_page_list(unsigned long page)
+{
+	return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
+}
+
 /* Accessed per-cpu */
 static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
 
+/* protected by kmmio_init_mutex */
 static struct notifier_block nb_die = {
 	.notifier_call = kmmio_die_notifier
 };
 
-int init_kmmio(void)
+/**
+ * Makes sure kmmio is initialized and usable.
+ * This must be called before any other kmmio function defined here.
+ * May sleep.
+ */
+void reference_kmmio(void)
 {
-	int i;
-	for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
-		INIT_LIST_HEAD(&kmmio_page_table[i]);
-
-	register_die_notifier(&nb_die);
-	return 0;
+	down(&kmmio_init_mutex);
+	spin_lock_irq(&kmmio_lock);
+	if (!kmmio_initialized) {
+		int i;
+		for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
+			INIT_LIST_HEAD(&kmmio_page_table[i]);
+		if (register_die_notifier(&nb_die))
+			BUG();
+	}
+	kmmio_initialized++;
+	spin_unlock_irq(&kmmio_lock);
+	up(&kmmio_init_mutex);
 }
+EXPORT_SYMBOL_GPL(reference_kmmio);
 
-void cleanup_kmmio(void)
+/**
+ * Clean up kmmio after use. This must be called for every call to
+ * reference_kmmio(). All probes registered after the corresponding
+ * reference_kmmio() must have been unregistered when calling this.
+ * May sleep.
+ */
+void unreference_kmmio(void)
 {
-	/*
-	 * Assume the following have been already cleaned by calling
-	 * unregister_kmmio_probe() appropriately:
-	 * kmmio_page_table, kmmio_probes
-	 */
-	if (handler_registered) {
-		if (mmiotrace_unregister_pf(&kmmio_page_fault))
-			BUG();
-		synchronize_rcu();
+	bool unreg = false;
+
+	down(&kmmio_init_mutex);
+	spin_lock_irq(&kmmio_lock);
+
+	if (kmmio_initialized == 1) {
+		BUG_ON(is_kmmio_active());
+		unreg = true;
 	}
-	unregister_die_notifier(&nb_die);
+	kmmio_initialized--;
+	BUG_ON(kmmio_initialized < 0);
+	spin_unlock_irq(&kmmio_lock);
+
+	if (unreg)
+		unregister_die_notifier(&nb_die); /* calls sync_rcu() */
+	up(&kmmio_init_mutex);
 }
+EXPORT_SYMBOL(unreference_kmmio);
 
 /*
  * this is basically a dynamic stabbing problem:
@@ -90,33 +140,33 @@ void cleanup_kmmio(void)
  * Overlap a Point (might be simple)
  * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
  */
-/* Get the kmmio at this addr (if any). You must be holding kmmio_lock. */
+/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
 static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
 {
 	struct kmmio_probe *p;
-	list_for_each_entry(p, &kmmio_probes, list) {
+	list_for_each_entry_rcu(p, &kmmio_probes, list) {
 		if (addr >= p->addr && addr <= (p->addr + p->len))
 			return p;
 	}
 	return NULL;
 }
 
+/* You must be holding RCU read lock. */
 static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
 {
-	struct list_head *head, *tmp;
+	struct list_head *head;
+	struct kmmio_fault_page *p;
 
 	page &= PAGE_MASK;
-	head = &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
-	list_for_each(tmp, head) {
-		struct kmmio_fault_page *p
-			= list_entry(tmp, struct kmmio_fault_page, list);
+	head = kmmio_page_list(page);
+	list_for_each_entry_rcu(p, head, list) {
 		if (p->page == page)
 			return p;
 	}
-
 	return NULL;
 }
 
+/** Mark the given page as not present. Access to it will trigger a fault. */
 static void arm_kmmio_fault_page(unsigned long page, int *page_level)
 {
 	unsigned long address = page & PAGE_MASK;
@@ -124,8 +174,8 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level)
 	pte_t *pte = lookup_address(address, &level);
 
 	if (!pte) {
-		printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n",
-						__FUNCTION__, page);
+		pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
+							__func__, page);
 		return;
 	}
 
@@ -143,6 +193,7 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level)
 	__flush_tlb_one(page);
 }
 
+/** Mark the given page as present. */
 static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
 {
 	unsigned long address = page & PAGE_MASK;
@@ -150,8 +201,8 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
 	pte_t *pte = lookup_address(address, &level);
 
 	if (!pte) {
-		printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n",
-						__FUNCTION__, page);
+		pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
+							__func__, page);
 		return;
 	}
 
@@ -169,13 +220,25 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
 	__flush_tlb_one(page);
 }
 
+/*
+ * This is being called from do_page_fault().
+ *
+ * We may be in an interrupt or a critical section. Also prefecthing may
+ * trigger a page fault. We may be in the middle of process switch.
+ * We cannot take any locks, because we could be executing especially
+ * within a kmmio critical section.
+ *
+ * Local interrupts are disabled, so preemption cannot happen.
+ * Do not enable interrupts, do not sleep, and watch out for other CPUs.
+ */
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate
  * and they remain disabled thorough out this function.
  */
-static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
+int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 {
-	struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
+	struct kmmio_context *ctx;
+	struct kmmio_fault_page *faultpage;
 
 	/*
 	 * Preemption is now disabled to prevent process switch during
@@ -186,40 +249,40 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 	 * XXX what if an interrupt occurs between returning from
 	 * do_page_fault() and entering the single-step exception handler?
 	 * And that interrupt triggers a kmmio trap?
+	 * XXX If we tracing an interrupt service routine or whatever, is
+	 * this enough to keep it on the current cpu?
 	 */
 	preempt_disable();
 
-	/* interrupts disabled and CPU-local data => atomicity guaranteed. */
+	rcu_read_lock();
+	faultpage = get_kmmio_fault_page(addr);
+	if (!faultpage) {
+		/*
+		 * Either this page fault is not caused by kmmio, or
+		 * another CPU just pulled the kmmio probe from under
+		 * our feet. In the latter case all hell breaks loose.
+		 */
+		goto no_kmmio;
+	}
+
+	ctx = &get_cpu_var(kmmio_ctx);
 	if (ctx->active) {
 		/*
-		 * This avoids a deadlock with kmmio_lock.
+		 * Prevent overwriting already in-flight context.
 		 * If this page fault really was due to kmmio trap,
 		 * all hell breaks loose.
 		 */
-		printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, "
-					"for address %lu. Ignoring.\n",
+		pr_emerg("kmmio: recursive probe hit on CPU %d, "
+					"for address 0x%08lx. Ignoring.\n",
 					smp_processor_id(), addr);
-		goto no_kmmio;
+		goto no_kmmio_ctx;
 	}
 	ctx->active++;
 
-	/*
-	 * Acquire the kmmio lock to prevent changes affecting
-	 * get_kmmio_fault_page() and get_kmmio_probe(), since we save their
-	 * returned pointers.
-	 * The lock is released in post_kmmio_handler().
-	 * XXX: could/should get_kmmio_*() be using RCU instead of spinlock?
-	 */
-	spin_lock(&kmmio_lock);
-
-	ctx->fpage = get_kmmio_fault_page(addr);
-	if (!ctx->fpage) {
-		/* this page fault is not caused by kmmio */
-		goto no_kmmio_locked;
-	}
-
+	ctx->fpage = faultpage;
 	ctx->probe = get_kmmio_probe(addr);
 	ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK));
+	ctx->addr = addr;
 
 	if (ctx->probe && ctx->probe->pre_handler)
 		ctx->probe->pre_handler(ctx->probe, regs, addr);
@@ -227,46 +290,62 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 	regs->flags |= TF_MASK;
 	regs->flags &= ~IF_MASK;
 
-	/* We hold lock, now we set present bit in PTE and single step. */
+	/* Now we set present bit in PTE and single step. */
 	disarm_kmmio_fault_page(ctx->fpage->page, NULL);
 
 	put_cpu_var(kmmio_ctx);
+	rcu_read_unlock();
 	return 1;
 
-no_kmmio_locked:
-	spin_unlock(&kmmio_lock);
-	ctx->active--;
+no_kmmio_ctx:
+	put_cpu_var(kmmio_ctx);
 no_kmmio:
+	rcu_read_unlock();
 	preempt_enable_no_resched();
-	put_cpu_var(kmmio_ctx);
-	/* page fault not handled by kmmio */
-	return 0;
+	return 0; /* page fault not handled by kmmio */
 }
 
 /*
  * Interrupts are disabled on entry as trap1 is an interrupt gate
  * and they remain disabled thorough out this function.
- * And we hold kmmio lock.
+ * This must always get called as the pair to kmmio_handler().
  */
 static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
 {
 	int ret = 0;
+	struct kmmio_probe *probe;
+	struct kmmio_fault_page *faultpage;
 	struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
 
 	if (!ctx->active)
 		goto out;
 
+	rcu_read_lock();
+
+	faultpage = get_kmmio_fault_page(ctx->addr);
+	probe = get_kmmio_probe(ctx->addr);
+	if (faultpage != ctx->fpage || probe != ctx->probe) {
+		/*
+		 * The trace setup changed after kmmio_handler() and before
+		 * running this respective post handler. User does not want
+		 * the result anymore.
+		 */
+		ctx->probe = NULL;
+		ctx->fpage = NULL;
+	}
+
 	if (ctx->probe && ctx->probe->post_handler)
 		ctx->probe->post_handler(ctx->probe, condition, regs);
 
-	arm_kmmio_fault_page(ctx->fpage->page, NULL);
+	if (ctx->fpage)
+		arm_kmmio_fault_page(ctx->fpage->page, NULL);
 
 	regs->flags &= ~TF_MASK;
 	regs->flags |= ctx->saved_flags;
 
 	/* These were acquired in kmmio_handler(). */
 	ctx->active--;
-	spin_unlock(&kmmio_lock);
+	BUG_ON(ctx->active);
 	preempt_enable_no_resched();
 
 	/*
@@ -277,11 +356,13 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
 	if (!(regs->flags & TF_MASK))
 		ret = 1;
 
+	rcu_read_unlock();
 out:
 	put_cpu_var(kmmio_ctx);
 	return ret;
 }
 
+/* You must be holding kmmio_lock. */
 static int add_kmmio_fault_page(unsigned long page)
 {
 	struct kmmio_fault_page *f;
@@ -289,6 +370,8 @@ static int add_kmmio_fault_page(unsigned long page)
 	page &= PAGE_MASK;
 	f = get_kmmio_fault_page(page);
 	if (f) {
+		if (!f->count)
+			arm_kmmio_fault_page(f->page, NULL);
 		f->count++;
 		return 0;
 	}
@@ -299,15 +382,16 @@ static int add_kmmio_fault_page(unsigned long page)
 
 	f->count = 1;
 	f->page = page;
-	list_add(&f->list,
-		 &kmmio_page_table[hash_long(f->page, KMMIO_PAGE_HASH_BITS)]);
+	list_add_rcu(&f->list, kmmio_page_list(f->page));
 
 	arm_kmmio_fault_page(f->page, NULL);
 
 	return 0;
 }
 
-static void release_kmmio_fault_page(unsigned long page)
+/* You must be holding kmmio_lock. */
+static void release_kmmio_fault_page(unsigned long page,
+				struct kmmio_fault_page **release_list)
 {
 	struct kmmio_fault_page *f;
 
@@ -317,9 +401,11 @@ static void release_kmmio_fault_page(unsigned long page)
 		return;
 
 	f->count--;
+	BUG_ON(f->count < 0);
 	if (!f->count) {
 		disarm_kmmio_fault_page(f->page, NULL);
-		list_del(&f->list);
+		f->release_next = *release_list;
+		*release_list = f;
 	}
 }
 
@@ -334,68 +420,113 @@ int register_kmmio_probe(struct kmmio_probe *p)
 		ret = -EEXIST;
 		goto out;
 	}
-	list_add(&p->list, &kmmio_probes);
-	/*printk("adding fault pages...\n");*/
+	list_add_rcu(&p->list, &kmmio_probes);
 	while (size < p->len) {
 		if (add_kmmio_fault_page(p->addr + size))
-			printk(KERN_ERR "mmio: Unable to set page fault.\n");
+			pr_err("kmmio: Unable to set page fault.\n");
 		size += PAGE_SIZE;
 	}
-
-	if (!handler_registered) {
-		if (mmiotrace_register_pf(&kmmio_page_fault))
-			printk(KERN_ERR "mmiotrace: Cannot register page "
-					"fault handler.\n");
-		else
-			handler_registered++;
-	}
-
 out:
 	spin_unlock_irq(&kmmio_lock);
 	/*
 	 * XXX: What should I do here?
 	 * Here was a call to global_flush_tlb(), but it does not exist
-	 * anymore.
+	 * anymore. It seems it's not needed after all.
 	 */
 	return ret;
 }
+EXPORT_SYMBOL(register_kmmio_probe);
 
+static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
+{
+	struct kmmio_delayed_release *dr = container_of(
+						head,
+						struct kmmio_delayed_release,
+						rcu);
+	struct kmmio_fault_page *p = dr->release_list;
+	while (p) {
+		struct kmmio_fault_page *next = p->release_next;
+		BUG_ON(p->count);
+		kfree(p);
+		p = next;
+	}
+	kfree(dr);
+}
+
+static void remove_kmmio_fault_pages(struct rcu_head *head)
+{
+	struct kmmio_delayed_release *dr = container_of(
+						head,
+						struct kmmio_delayed_release,
+						rcu);
+	struct kmmio_fault_page *p = dr->release_list;
+	struct kmmio_fault_page **prevp = &dr->release_list;
+	unsigned long flags;
+	spin_lock_irqsave(&kmmio_lock, flags);
+	while (p) {
+		if (!p->count)
+			list_del_rcu(&p->list);
+		else
+			*prevp = p->release_next;
+		prevp = &p->release_next;
+		p = p->release_next;
+	}
+	spin_unlock_irqrestore(&kmmio_lock, flags);
+	/* This is the real RCU destroy call. */
+	call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
+}
+
+/*
+ * Remove a kmmio probe. You have to synchronize_rcu() before you can be
+ * sure that the callbacks will not be called anymore.
+ *
+ * Unregistering a kmmio fault page has three steps:
+ * 1. release_kmmio_fault_page()
+ *    Disarm the page, wait a grace period to let all faults finish.
+ * 2. remove_kmmio_fault_pages()
+ *    Remove the pages from kmmio_page_table.
+ * 3. rcu_free_kmmio_fault_pages()
+ *    Actally free the kmmio_fault_page structs as with RCU.
+ */
 void unregister_kmmio_probe(struct kmmio_probe *p)
 {
 	unsigned long size = 0;
+	struct kmmio_fault_page *release_list = NULL;
+	struct kmmio_delayed_release *drelease;
 
 	spin_lock_irq(&kmmio_lock);
 	while (size < p->len) {
-		release_kmmio_fault_page(p->addr + size);
+		release_kmmio_fault_page(p->addr + size, &release_list);
 		size += PAGE_SIZE;
 	}
-	list_del(&p->list);
+	list_del_rcu(&p->list);
 	kmmio_count--;
 	spin_unlock_irq(&kmmio_lock);
-}
 
-/*
- * According to 2.6.20, mainly x86_64 arch:
- * This is being called from do_page_fault(), via the page fault notifier
- * chain. The chain is called for both user space faults and kernel space
- * faults (address >= TASK_SIZE64), except not on faults serviced by
- * vmalloc_fault().
- *
- * We may be in an interrupt or a critical section. Also prefecthing may
- * trigger a page fault. We may be in the middle of process switch.
- * The page fault hook functionality has put us inside RCU read lock.
- *
- * Local interrupts are disabled, so preemption cannot happen.
- * Do not enable interrupts, do not sleep, and watch out for other CPUs.
- */
-static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code,
-						unsigned long address)
-{
-	if (is_kmmio_active())
-		if (kmmio_handler(regs, address) == 1)
-			return -1;
-	return 0;
+	drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
+	if (!drelease) {
+		pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
+		return;
+	}
+	drelease->release_list = release_list;
+
+	/*
+	 * This is not really RCU here. We have just disarmed a set of
+	 * pages so that they cannot trigger page faults anymore. However,
+	 * we cannot remove the pages from kmmio_page_table,
+	 * because a probe hit might be in flight on another CPU. The
+	 * pages are collected into a list, and they will be removed from
+	 * kmmio_page_table when it is certain that no probe hit related to
+	 * these pages can be in flight. RCU grace period sounds like a
+	 * good choice.
+	 *
+	 * If we removed the pages too early, kmmio page fault handler might
+	 * not find the respective kmmio_fault_page and determine it's not
+	 * a kmmio fault, when it actually is. This would lead to madness.
+	 */
+	call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
 }
+EXPORT_SYMBOL(unregister_kmmio_probe);
 
 static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
 								void *args)
diff --git a/arch/x86/kernel/mmiotrace/kmmio.h b/arch/x86/kernel/mmiotrace/kmmio.h
deleted file mode 100644
index 85b7f68a3b8..00000000000
--- a/arch/x86/kernel/mmiotrace/kmmio.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef _LINUX_KMMIO_H
-#define _LINUX_KMMIO_H
-
-#include <linux/list.h>
-#include <linux/notifier.h>
-#include <linux/smp.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/version.h>
-#include <linux/kdebug.h>
-
-struct kmmio_probe;
-struct kmmio_fault_page;
-struct pt_regs;
-
-typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *,
-				struct pt_regs *, unsigned long addr);
-typedef void (*kmmio_post_handler_t)(struct kmmio_probe *,
-				unsigned long condition, struct pt_regs *);
-
-struct kmmio_probe {
-	struct list_head list;
-
-	/* start location of the probe point */
-	unsigned long addr;
-
-	/* length of the probe region */
-	unsigned long len;
-
-	 /* Called before addr is executed. */
-	kmmio_pre_handler_t pre_handler;
-
-	/* Called after addr is executed, unless... */
-	kmmio_post_handler_t post_handler;
-};
-
-struct kmmio_fault_page {
-	struct list_head list;
-
-	/* location of the fault page */
-	unsigned long page;
-
-	int count;
-};
-
-/* kmmio is active by some kmmio_probes? */
-static inline int is_kmmio_active(void)
-{
-	extern unsigned int kmmio_count;
-	return kmmio_count;
-}
-
-int init_kmmio(void);
-void cleanup_kmmio(void);
-int register_kmmio_probe(struct kmmio_probe *p);
-void unregister_kmmio_probe(struct kmmio_probe *p);
-
-#endif /* _LINUX_KMMIO_H */
diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c
index f9c609266d8..e1a508588f0 100644
--- a/arch/x86/kernel/mmiotrace/mmio-mod.c
+++ b/arch/x86/kernel/mmiotrace/mmio-mod.c
@@ -32,7 +32,6 @@
 #include <asm/atomic.h>
 #include <linux/percpu.h>
 
-#include "kmmio.h"
 #include "pf_in.h"
 
 /* This app's relay channel files will appear in /debug/mmio-trace */
@@ -129,18 +128,17 @@ static void print_pte(unsigned long address)
 	pte_t *pte = lookup_address(address, &level);
 
 	if (!pte) {
-		printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n",
-						__FUNCTION__, address);
+		pr_err(MODULE_NAME ": Error in %s: no pte for page 0x%08lx\n",
+							__func__, address);
 		return;
 	}
 
 	if (level == PG_LEVEL_2M) {
-		printk(KERN_EMERG MODULE_NAME ": 4MB pages are not "
-						"currently supported: %lx\n",
-						address);
+		pr_emerg(MODULE_NAME ": 4MB pages are not currently "
+						"supported: %lx\n", address);
 		BUG();
 	}
-	printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n",
+	pr_info(MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n",
 					address, pte_val(*pte),
 					pte_val(*pte) & _PAGE_PRESENT);
 }
@@ -152,7 +150,7 @@ static void print_pte(unsigned long address)
 static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
 {
 	const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
-	printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, "
+	pr_emerg(MODULE_NAME ": unexpected fault for address: %lx, "
 					"last fault for address: %lx\n",
 					addr, my_reason->addr);
 	print_pte(addr);
@@ -160,20 +158,17 @@ static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
 	print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip);
 	print_symbol(KERN_EMERG "last faulting EIP was at %s\n",
 							my_reason->ip);
-	printk(KERN_EMERG
-			"eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
+	pr_emerg("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
 			regs->ax, regs->bx, regs->cx, regs->dx);
-	printk(KERN_EMERG
-			"esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
+	pr_emerg("esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
 			regs->si, regs->di, regs->bp, regs->sp);
 #else
 	print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip);
 	print_symbol(KERN_EMERG "last faulting RIP was at %s\n",
 							my_reason->ip);
-	printk(KERN_EMERG "rax: %016lx   rcx: %016lx   rdx: %016lx\n",
+	pr_emerg("rax: %016lx   rcx: %016lx   rdx: %016lx\n",
 					regs->ax, regs->cx, regs->dx);
-	printk(KERN_EMERG "rsi: %016lx   rdi: %016lx   "
-				"rbp: %016lx   rsp: %016lx\n",
+	pr_emerg("rsi: %016lx   rdi: %016lx   rbp: %016lx   rsp: %016lx\n",
 				regs->si, regs->di, regs->bp, regs->sp);
 #endif
 	put_cpu_var(pf_reason);
@@ -251,10 +246,15 @@ static void post(struct kmmio_probe *p, unsigned long condition,
 	struct trap_reason *my_reason = &get_cpu_var(pf_reason);
 	struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace);
 
+	/*
+	 * XXX: This might not get called, if the probe is removed while
+	 * trace hit is on flight.
+	 */
+
 	/* this should always return the active_trace count to 0 */
 	my_reason->active_traces--;
 	if (my_reason->active_traces) {
-		printk(KERN_EMERG MODULE_NAME ": unexpected post handler");
+		pr_emerg(MODULE_NAME ": unexpected post handler");
 		BUG();
 	}
 
@@ -283,16 +283,15 @@ static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf,
 	atomic_t *drop = &per_cpu(dropped, cpu);
 	int count;
 	if (relay_buf_full(buf)) {
-		if (atomic_inc_return(drop) == 1) {
-			printk(KERN_ERR MODULE_NAME ": cpu %d buffer full!\n",
-									cpu);
-		}
+		if (atomic_inc_return(drop) == 1)
+			pr_err(MODULE_NAME ": cpu %d buffer full!\n", cpu);
 		return 0;
-	} else if ((count = atomic_read(drop))) {
-		printk(KERN_ERR MODULE_NAME
-					": cpu %d buffer no longer full, "
-					"missed %d events.\n",
-					cpu, count);
+	}
+	count = atomic_read(drop);
+	if (count) {
+		pr_err(MODULE_NAME ": cpu %d buffer no longer full, "
+						"missed %d events.\n",
+						cpu, count);
 		atomic_sub(count, drop);
 	}
 
@@ -407,8 +406,8 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size,
 	/* Don't trace the low PCI/ISA area, it's always mapped.. */
 	if (!ISA_trace && (offset < ISA_END_ADDRESS) &&
 					(offset + size > ISA_START_ADDRESS)) {
-		printk(KERN_NOTICE MODULE_NAME ": Ignoring map of low "
-						"PCI/ISA area (0x%lx-0x%lx)\n",
+		pr_notice(MODULE_NAME ": Ignoring map of low PCI/ISA area "
+						"(0x%lx-0x%lx)\n",
 						offset, offset + size);
 		return;
 	}
@@ -418,7 +417,7 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size,
 void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size)
 {
 	void __iomem *p = ioremap_cache(offset, size);
-	printk(KERN_DEBUG MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n",
+	pr_debug(MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n",
 							offset, size, p);
 	ioremap_trace_core(offset, size, p);
 	return p;
@@ -428,7 +427,7 @@ EXPORT_SYMBOL(ioremap_cache_trace);
 void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size)
 {
 	void __iomem *p = ioremap_nocache(offset, size);
-	printk(KERN_DEBUG MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n",
+	pr_debug(MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n",
 							offset, size, p);
 	ioremap_trace_core(offset, size, p);
 	return p;
@@ -455,7 +454,7 @@ void iounmap_trace(volatile void __iomem *addr)
 	};
 	struct remap_trace *trace;
 	struct remap_trace *tmp;
-	printk(KERN_DEBUG MODULE_NAME ": Unmapping %p.\n", addr);
+	pr_debug(MODULE_NAME ": Unmapping %p.\n", addr);
 	record_timestamp(&event.header);
 
 	spin_lock(&trace_list_lock);
@@ -481,7 +480,7 @@ static void clear_trace_list(void)
 
 	spin_lock(&trace_list_lock);
 	list_for_each_entry_safe(trace, tmp, &trace_list, list) {
-		printk(KERN_WARNING MODULE_NAME ": purging non-iounmapped "
+		pr_warning(MODULE_NAME ": purging non-iounmapped "
 					"trace @0x%08lx, size 0x%lx.\n",
 					trace->probe.addr, trace->probe.len);
 		if (!nommiotrace)
@@ -500,39 +499,37 @@ static int __init init(void)
 
 	dir = debugfs_create_dir(APP_DIR, NULL);
 	if (!dir) {
-		printk(KERN_ERR MODULE_NAME
-				": Couldn't create relay app directory.\n");
+		pr_err(MODULE_NAME ": Couldn't create relay app directory.\n");
 		return -ENOMEM;
 	}
 
 	chan = create_channel(subbuf_size, n_subbufs);
 	if (!chan) {
 		debugfs_remove(dir);
-		printk(KERN_ERR MODULE_NAME
-				": relay app channel creation failed\n");
+		pr_err(MODULE_NAME ": relay app channel creation failed\n");
 		return -ENOMEM;
 	}
 
-	init_kmmio();
+	reference_kmmio();
 
 	proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL);
 	if (proc_marker_file)
 		proc_marker_file->write_proc = write_marker;
 
-	printk(KERN_DEBUG MODULE_NAME ": loaded.\n");
+	pr_debug(MODULE_NAME ": loaded.\n");
 	if (nommiotrace)
-		printk(KERN_DEBUG MODULE_NAME ": MMIO tracing disabled.\n");
+		pr_info(MODULE_NAME ": MMIO tracing disabled.\n");
 	if (ISA_trace)
-		printk(KERN_WARNING MODULE_NAME
-				": Warning! low ISA range will be traced.\n");
+		pr_warning(MODULE_NAME ": Warning! low ISA range will be "
+								"traced.\n");
 	return 0;
 }
 
 static void __exit cleanup(void)
 {
-	printk(KERN_DEBUG MODULE_NAME ": unload...\n");
+	pr_debug(MODULE_NAME ": unload...\n");
 	clear_trace_list();
-	cleanup_kmmio();
+	unreference_kmmio();
 	remove_proc_entry(MARKER_FILE, NULL);
 	destroy_channel();
 	if (dir)
diff --git a/arch/x86/kernel/mmiotrace/pf_in.c b/arch/x86/kernel/mmiotrace/pf_in.c
index 67ea520dde6..efa1911e20c 100644
--- a/arch/x86/kernel/mmiotrace/pf_in.c
+++ b/arch/x86/kernel/mmiotrace/pf_in.c
@@ -19,7 +19,7 @@
  *
  */
 
-/*  $Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp $
+/*  Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp
  *  Copyright by Intel Crop., 2002
  *  Louis Zhuang (louis.zhuang@intel.com)
  *
diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c
index 40e66b0e648..5ecff578672 100644
--- a/arch/x86/kernel/mmiotrace/testmmiotrace.c
+++ b/arch/x86/kernel/mmiotrace/testmmiotrace.c
@@ -41,8 +41,7 @@ static void do_test(void)
 {
 	void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000);
 	if (!p) {
-		printk(KERN_ERR MODULE_NAME ": could not ioremap IO memory, "
-							"aborting.\n");
+		pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
 		return;
 	}
 	do_write_test(p);
@@ -53,14 +52,14 @@ static void do_test(void)
 static int __init init(void)
 {
 	if (mmio_address == 0) {
-		printk(KERN_ERR MODULE_NAME ": you have to use the module "
-						"argument mmio_address.\n");
-		printk(KERN_ERR MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
+		pr_err(MODULE_NAME ": you have to use the module argument "
+							"mmio_address.\n");
+		pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
 				" YOU REALLY KNOW WHAT YOU ARE DOING!\n");
 		return -ENXIO;
 	}
 
-	printk(KERN_WARNING MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
+	pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
 					"in PCI address space, and writing "
 					"rubbish in there.\n", mmio_address);
 	do_test();
@@ -69,7 +68,7 @@ static int __init init(void)
 
 static void __exit cleanup(void)
 {
-	printk(KERN_DEBUG MODULE_NAME ": unloaded.\n");
+	pr_debug(MODULE_NAME ": unloaded.\n");
 }
 
 module_init(init);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e9a086a1a9f..8c828a68d3b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,6 +10,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/ptrace.h>
+#include <linux/mmiotrace.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
@@ -49,60 +50,14 @@
 #define PF_RSVD		(1<<3)
 #define PF_INSTR	(1<<4)
 
-#ifdef CONFIG_MMIOTRACE_HOOKS
-static pf_handler_func mmiotrace_pf_handler; /* protected by RCU */
-static DEFINE_SPINLOCK(mmiotrace_handler_lock);
-
-int mmiotrace_register_pf(pf_handler_func new_pfh)
-{
-	int ret = 0;
-	unsigned long flags;
-	spin_lock_irqsave(&mmiotrace_handler_lock, flags);
-	if (mmiotrace_pf_handler)
-		ret = -EBUSY;
-	else
-		mmiotrace_pf_handler = new_pfh;
-	spin_unlock_irqrestore(&mmiotrace_handler_lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(mmiotrace_register_pf);
-
-/**
- * mmiotrace_unregister_pf:
- * The caller must ensure @old_pfh is not in use anymore before freeing it.
- * This function does not guarantee it. The handler function pointer is
- * protected by RCU, so you can do this by e.g. calling synchronize_rcu().
- */
-int mmiotrace_unregister_pf(pf_handler_func old_pfh)
-{
-	int ret = 0;
-	unsigned long flags;
-	spin_lock_irqsave(&mmiotrace_handler_lock, flags);
-	if (mmiotrace_pf_handler != old_pfh)
-		ret = -EPERM;
-	else
-		mmiotrace_pf_handler = NULL;
-	spin_unlock_irqrestore(&mmiotrace_handler_lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(mmiotrace_unregister_pf);
-#endif /* CONFIG_MMIOTRACE_HOOKS */
-
-/* returns non-zero if do_page_fault() should return */
-static inline int call_mmiotrace(struct pt_regs *regs,
-					unsigned long error_code,
-					unsigned long address)
+static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
 {
 #ifdef CONFIG_MMIOTRACE_HOOKS
-	int ret = 0;
-	rcu_read_lock();
-	if (mmiotrace_pf_handler)
-		ret = mmiotrace_pf_handler(regs, error_code, address);
-	rcu_read_unlock();
-	return ret;
-#else
-	return 0;
+	if (unlikely(is_kmmio_active()))
+		if (kmmio_handler(regs, addr) == 1)
+			return -1;
 #endif
+	return 0;
 }
 
 static inline int notify_page_fault(struct pt_regs *regs)
@@ -657,7 +612,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 
 	if (notify_page_fault(regs))
 		return;
-	if (call_mmiotrace(regs, error_code, address))
+	if (unlikely(kmmio_fault(regs, address)))
 		return;
 
 	/*
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index 7063281040d..96651bb59ba 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -35,11 +35,4 @@ extern void show_regs(struct pt_regs *regs);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
 
-typedef int (*pf_handler_func)(struct pt_regs *regs,
-				unsigned long error_code,
-				unsigned long address);
-
-extern int mmiotrace_register_pf(pf_handler_func new_pfh);
-extern int mmiotrace_unregister_pf(pf_handler_func old_pfh);
-
 #endif
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 6ec288f1fe2..d87a6cd8b68 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -3,6 +3,44 @@
 
 #include <asm/types.h>
 
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+
+struct kmmio_probe;
+struct pt_regs;
+
+typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *,
+				struct pt_regs *, unsigned long addr);
+typedef void (*kmmio_post_handler_t)(struct kmmio_probe *,
+				unsigned long condition, struct pt_regs *);
+
+struct kmmio_probe {
+	struct list_head list;
+	unsigned long addr; /* start location of the probe point */
+	unsigned long len; /* length of the probe region */
+	kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */
+	kmmio_post_handler_t post_handler; /* Called after addr is executed */
+};
+
+/* kmmio is active by some kmmio_probes? */
+static inline int is_kmmio_active(void)
+{
+	extern unsigned int kmmio_count;
+	return kmmio_count;
+}
+
+extern void reference_kmmio(void);
+extern void unreference_kmmio(void);
+extern int register_kmmio_probe(struct kmmio_probe *p);
+extern void unregister_kmmio_probe(struct kmmio_probe *p);
+
+/* Called from page fault handler. */
+extern int kmmio_handler(struct pt_regs *regs, unsigned long addr);
+
+#endif /* __KERNEL__ */
+
+
 /*
  * If you change anything here, you must bump MMIO_VERSION.
  * This is the relay data format for user space.
-- 
cgit v1.2.3-70-g09d2


From 40bd21740012132afb62d78ac3e6b82372b2fbc2 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@suse.cz>
Date: Wed, 21 May 2008 11:44:02 +0200
Subject: x86: automatical unification of i8259.c

Make conversion of i8259 very mechanical -- i8259 was generated by
---
 arch/x86/kernel/Makefile   |   2 +-
 arch/x86/kernel/i8259.c    | 368 +++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/i8259_32.c | 295 ------------------------------------
 arch/x86/kernel/i8259_64.c | 309 -------------------------------------
 include/asm-x86/i8259.h    |   2 +
 5 files changed, 371 insertions(+), 605 deletions(-)
 create mode 100644 arch/x86/kernel/i8259.c

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3b472..f71a76ef259 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -18,7 +18,7 @@ CFLAGS_tsc_64.o		:= $(nostackp)
 obj-y			:= process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
 obj-y			+= traps_$(BITS).o irq_$(BITS).o
 obj-y			+= time_$(BITS).o ioport.o ldt.o
-obj-y			+= setup_$(BITS).o i8259_$(BITS).o setup.o
+obj-y			+= setup_$(BITS).o i8259.o i8259_$(BITS).o setup.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o setup64.o
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
new file mode 100644
index 00000000000..2decba6b010
--- /dev/null
+++ b/arch/x86/kernel/i8259.c
@@ -0,0 +1,368 @@
+#ifdef CONFIG_X86_64
+#include <linux/linkage.h>
+#endif /* CONFIG_X86_64 */
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#ifdef CONFIG_X86_64
+#include <linux/timex.h>
+#endif /* CONFIG_X86_64 */
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/kernel_stat.h>
+#include <linux/sysdev.h>
+#include <linux/bitops.h>
+
+#ifdef CONFIG_X86_64
+#include <asm/acpi.h>
+#endif /* CONFIG_X86_64 */
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#ifndef CONFIG_X86_64
+#include <asm/timer.h>
+#else /* CONFIG_X86_64 */
+#include <asm/hw_irq.h>
+#endif /* CONFIG_X86_64 */
+#include <asm/pgtable.h>
+#include <asm/delay.h>
+#include <asm/desc.h>
+#include <asm/apic.h>
+#ifndef CONFIG_X86_64
+#include <asm/arch_hooks.h>
+#endif /* ! CONFIG_X86_64 */
+#include <asm/i8259.h>
+
+/*
+ * This is the 'legacy' 8259A Programmable Interrupt Controller,
+ * present in the majority of PC/AT boxes.
+ * plus some generic x86 specific things if generic specifics makes
+ * any sense at all.
+ */
+
+static int i8259A_auto_eoi;
+DEFINE_SPINLOCK(i8259A_lock);
+static void mask_and_ack_8259A(unsigned int);
+
+struct irq_chip i8259A_chip = {
+	.name		= "XT-PIC",
+	.mask		= disable_8259A_irq,
+	.disable	= disable_8259A_irq,
+	.unmask		= enable_8259A_irq,
+	.mask_ack	= mask_and_ack_8259A,
+};
+
+/*
+ * 8259A PIC functions to handle ISA devices:
+ */
+
+/*
+ * This contains the irq mask for both 8259A irq controllers,
+ */
+unsigned int cached_irq_mask = 0xffff;
+
+/*
+ * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
+ * boards the timer interrupt is not really connected to any IO-APIC pin,
+ * it's fed to the master 8259A's IR0 line only.
+ *
+ * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
+ * this 'mixed mode' IRQ handling costs nothing because it's only used
+ * at IRQ setup time.
+ */
+unsigned long io_apic_irqs;
+
+void disable_8259A_irq(unsigned int irq)
+{
+	unsigned int mask = 1 << irq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+	cached_irq_mask |= mask;
+	if (irq & 8)
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
+	else
+		outb(cached_master_mask, PIC_MASTER_IMR);
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void enable_8259A_irq(unsigned int irq)
+{
+	unsigned int mask = ~(1 << irq);
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+	cached_irq_mask &= mask;
+	if (irq & 8)
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
+	else
+		outb(cached_master_mask, PIC_MASTER_IMR);
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+int i8259A_irq_pending(unsigned int irq)
+{
+	unsigned int mask = 1<<irq;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+	if (irq < 8)
+		ret = inb(PIC_MASTER_CMD) & mask;
+	else
+		ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+
+	return ret;
+}
+
+void make_8259A_irq(unsigned int irq)
+{
+	disable_irq_nosync(irq);
+	io_apic_irqs &= ~(1<<irq);
+	set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
+				      "XT");
+	enable_irq(irq);
+}
+
+/*
+ * This function assumes to be called rarely. Switching between
+ * 8259A registers is slow.
+ * This has to be protected by the irq controller spinlock
+ * before being called.
+ */
+static inline int i8259A_irq_real(unsigned int irq)
+{
+	int value;
+	int irqmask = 1<<irq;
+
+	if (irq < 8) {
+		outb(0x0B,PIC_MASTER_CMD);	/* ISR register */
+		value = inb(PIC_MASTER_CMD) & irqmask;
+		outb(0x0A,PIC_MASTER_CMD);	/* back to the IRR register */
+		return value;
+	}
+	outb(0x0B,PIC_SLAVE_CMD);	/* ISR register */
+	value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
+	outb(0x0A,PIC_SLAVE_CMD);	/* back to the IRR register */
+	return value;
+}
+
+/*
+ * Careful! The 8259A is a fragile beast, it pretty
+ * much _has_ to be done exactly like this (mask it
+ * first, _then_ send the EOI, and the order of EOI
+ * to the two 8259s is important!
+ */
+static void mask_and_ack_8259A(unsigned int irq)
+{
+	unsigned int irqmask = 1 << irq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+	/*
+	 * Lightweight spurious IRQ detection. We do not want
+	 * to overdo spurious IRQ handling - it's usually a sign
+	 * of hardware problems, so we only do the checks we can
+	 * do without slowing down good hardware unnecessarily.
+	 *
+	 * Note that IRQ7 and IRQ15 (the two spurious IRQs
+	 * usually resulting from the 8259A-1|2 PICs) occur
+	 * even if the IRQ is masked in the 8259A. Thus we
+	 * can check spurious 8259A IRQs without doing the
+	 * quite slow i8259A_irq_real() call for every IRQ.
+	 * This does not cover 100% of spurious interrupts,
+	 * but should be enough to warn the user that there
+	 * is something bad going on ...
+	 */
+	if (cached_irq_mask & irqmask)
+		goto spurious_8259A_irq;
+	cached_irq_mask |= irqmask;
+
+handle_real_irq:
+	if (irq & 8) {
+		inb(PIC_SLAVE_IMR);	/* DUMMY - (do we need this?) */
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
+#ifndef CONFIG_X86_64
+		outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */
+		outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */
+#else /* CONFIG_X86_64 */
+		/* 'Specific EOI' to slave */
+		outb(0x60+(irq&7),PIC_SLAVE_CMD);
+		 /* 'Specific EOI' to master-IRQ2 */
+		outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD);
+#endif /* CONFIG_X86_64 */
+	} else {
+		inb(PIC_MASTER_IMR);	/* DUMMY - (do we need this?) */
+		outb(cached_master_mask, PIC_MASTER_IMR);
+#ifndef CONFIG_X86_64
+		outb(0x60+irq,PIC_MASTER_CMD);	/* 'Specific EOI to master */
+#else /* CONFIG_X86_64 */
+		/* 'Specific EOI' to master */
+		outb(0x60+irq,PIC_MASTER_CMD);
+#endif /* CONFIG_X86_64 */
+	}
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+	return;
+
+spurious_8259A_irq:
+	/*
+	 * this is the slow path - should happen rarely.
+	 */
+	if (i8259A_irq_real(irq))
+		/*
+		 * oops, the IRQ _is_ in service according to the
+		 * 8259A - not spurious, go handle it.
+		 */
+		goto handle_real_irq;
+
+	{
+		static int spurious_irq_mask;
+		/*
+		 * At this point we can be sure the IRQ is spurious,
+		 * lets ACK and report it. [once per IRQ]
+		 */
+		if (!(spurious_irq_mask & irqmask)) {
+#ifndef CONFIG_X86_64
+			printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
+#else /* CONFIG_X86_64 */
+			printk(KERN_DEBUG
+			       "spurious 8259A interrupt: IRQ%d.\n", irq);
+#endif /* CONFIG_X86_64 */
+			spurious_irq_mask |= irqmask;
+		}
+		atomic_inc(&irq_err_count);
+		/*
+		 * Theoretically we do not have to handle this IRQ,
+		 * but in Linux this does not cause problems and is
+		 * simpler for us.
+		 */
+		goto handle_real_irq;
+	}
+}
+
+static char irq_trigger[2];
+/**
+ * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
+ */
+static void restore_ELCR(char *trigger)
+{
+	outb(trigger[0], 0x4d0);
+	outb(trigger[1], 0x4d1);
+}
+
+static void save_ELCR(char *trigger)
+{
+	/* IRQ 0,1,2,8,13 are marked as reserved */
+	trigger[0] = inb(0x4d0) & 0xF8;
+	trigger[1] = inb(0x4d1) & 0xDE;
+}
+
+static int i8259A_resume(struct sys_device *dev)
+{
+	init_8259A(i8259A_auto_eoi);
+	restore_ELCR(irq_trigger);
+	return 0;
+}
+
+static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
+{
+	save_ELCR(irq_trigger);
+	return 0;
+}
+
+static int i8259A_shutdown(struct sys_device *dev)
+{
+	/* Put the i8259A into a quiescent state that
+	 * the kernel initialization code can get it
+	 * out of.
+	 */
+	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-1 */
+	return 0;
+}
+
+static struct sysdev_class i8259_sysdev_class = {
+	.name = "i8259",
+	.suspend = i8259A_suspend,
+	.resume = i8259A_resume,
+	.shutdown = i8259A_shutdown,
+};
+
+static struct sys_device device_i8259A = {
+	.id	= 0,
+	.cls	= &i8259_sysdev_class,
+};
+
+static int __init i8259A_init_sysfs(void)
+{
+	int error = sysdev_class_register(&i8259_sysdev_class);
+	if (!error)
+		error = sysdev_register(&device_i8259A);
+	return error;
+}
+
+device_initcall(i8259A_init_sysfs);
+
+void init_8259A(int auto_eoi)
+{
+	unsigned long flags;
+
+	i8259A_auto_eoi = auto_eoi;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+
+	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
+
+	/*
+	 * outb_pic - this has to work on a wide range of PC hardware.
+	 */
+	outb_pic(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
+#ifndef CONFIG_X86_64
+	outb_pic(0x20 + 0, PIC_MASTER_IMR);	/* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
+	outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);	/* 8259A-1 (the master) has a slave on IR2 */
+#else /* CONFIG_X86_64 */
+	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
+	outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
+	/* 8259A-1 (the master) has a slave on IR2 */
+	outb_pic(0x04, PIC_MASTER_IMR);
+#endif /* CONFIG_X86_64 */
+	if (auto_eoi)	/* master does Auto EOI */
+		outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
+	else		/* master expects normal EOI */
+		outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
+
+	outb_pic(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
+#ifndef CONFIG_X86_64
+	outb_pic(0x20 + 8, PIC_SLAVE_IMR);	/* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
+	outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);	/* 8259A-2 is a slave on master's IR2 */
+	outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */
+#else /* CONFIG_X86_64 */
+	/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
+	outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
+	/* 8259A-2 is a slave on master's IR2 */
+	outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
+	/* (slave's support for AEOI in flat mode is to be investigated) */
+	outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
+
+#endif /* CONFIG_X86_64 */
+	if (auto_eoi)
+		/*
+		 * In AEOI mode we just have to mask the interrupt
+		 * when acking.
+		 */
+		i8259A_chip.mask_ack = disable_8259A_irq;
+	else
+		i8259A_chip.mask_ack = mask_and_ack_8259A;
+
+	udelay(100);		/* wait for 8259A to initialize */
+
+	outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
+	outb(cached_slave_mask, PIC_SLAVE_IMR);	  /* restore slave IRQ mask */
+
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c
index fe631967d62..d66914287ee 100644
--- a/arch/x86/kernel/i8259_32.c
+++ b/arch/x86/kernel/i8259_32.c
@@ -21,302 +21,7 @@
 #include <asm/arch_hooks.h>
 #include <asm/i8259.h>
 
-/*
- * This is the 'legacy' 8259A Programmable Interrupt Controller,
- * present in the majority of PC/AT boxes.
- * plus some generic x86 specific things if generic specifics makes
- * any sense at all.
- */
-
-static int i8259A_auto_eoi;
-DEFINE_SPINLOCK(i8259A_lock);
-static void mask_and_ack_8259A(unsigned int);
-
-static struct irq_chip i8259A_chip = {
-	.name		= "XT-PIC",
-	.mask		= disable_8259A_irq,
-	.disable	= disable_8259A_irq,
-	.unmask		= enable_8259A_irq,
-	.mask_ack	= mask_and_ack_8259A,
-};
-
-/*
- * 8259A PIC functions to handle ISA devices:
- */
-
-/*
- * This contains the irq mask for both 8259A irq controllers,
- */
-unsigned int cached_irq_mask = 0xffff;
-
-/*
- * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
- * boards the timer interrupt is not really connected to any IO-APIC pin,
- * it's fed to the master 8259A's IR0 line only.
- *
- * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
- * this 'mixed mode' IRQ handling costs nothing because it's only used
- * at IRQ setup time.
- */
-unsigned long io_apic_irqs;
-
-void disable_8259A_irq(unsigned int irq)
-{
-	unsigned int mask = 1 << irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	cached_irq_mask |= mask;
-	if (irq & 8)
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-	else
-		outb(cached_master_mask, PIC_MASTER_IMR);
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-void enable_8259A_irq(unsigned int irq)
-{
-	unsigned int mask = ~(1 << irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	cached_irq_mask &= mask;
-	if (irq & 8)
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-	else
-		outb(cached_master_mask, PIC_MASTER_IMR);
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-int i8259A_irq_pending(unsigned int irq)
-{
-	unsigned int mask = 1<<irq;
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	if (irq < 8)
-		ret = inb(PIC_MASTER_CMD) & mask;
-	else
-		ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-
-	return ret;
-}
-
-void make_8259A_irq(unsigned int irq)
-{
-	disable_irq_nosync(irq);
-	io_apic_irqs &= ~(1<<irq);
-	set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
-				      "XT");
-	enable_irq(irq);
-}
-
-/*
- * This function assumes to be called rarely. Switching between
- * 8259A registers is slow.
- * This has to be protected by the irq controller spinlock
- * before being called.
- */
-static inline int i8259A_irq_real(unsigned int irq)
-{
-	int value;
-	int irqmask = 1<<irq;
-
-	if (irq < 8) {
-		outb(0x0B,PIC_MASTER_CMD);	/* ISR register */
-		value = inb(PIC_MASTER_CMD) & irqmask;
-		outb(0x0A,PIC_MASTER_CMD);	/* back to the IRR register */
-		return value;
-	}
-	outb(0x0B,PIC_SLAVE_CMD);	/* ISR register */
-	value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
-	outb(0x0A,PIC_SLAVE_CMD);	/* back to the IRR register */
-	return value;
-}
-
-/*
- * Careful! The 8259A is a fragile beast, it pretty
- * much _has_ to be done exactly like this (mask it
- * first, _then_ send the EOI, and the order of EOI
- * to the two 8259s is important!
- */
-static void mask_and_ack_8259A(unsigned int irq)
-{
-	unsigned int irqmask = 1 << irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	/*
-	 * Lightweight spurious IRQ detection. We do not want
-	 * to overdo spurious IRQ handling - it's usually a sign
-	 * of hardware problems, so we only do the checks we can
-	 * do without slowing down good hardware unnecessarily.
-	 *
-	 * Note that IRQ7 and IRQ15 (the two spurious IRQs
-	 * usually resulting from the 8259A-1|2 PICs) occur
-	 * even if the IRQ is masked in the 8259A. Thus we
-	 * can check spurious 8259A IRQs without doing the
-	 * quite slow i8259A_irq_real() call for every IRQ.
-	 * This does not cover 100% of spurious interrupts,
-	 * but should be enough to warn the user that there
-	 * is something bad going on ...
-	 */
-	if (cached_irq_mask & irqmask)
-		goto spurious_8259A_irq;
-	cached_irq_mask |= irqmask;
 
-handle_real_irq:
-	if (irq & 8) {
-		inb(PIC_SLAVE_IMR);	/* DUMMY - (do we need this?) */
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-		outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */
-		outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */
-	} else {
-		inb(PIC_MASTER_IMR);	/* DUMMY - (do we need this?) */
-		outb(cached_master_mask, PIC_MASTER_IMR);
-		outb(0x60+irq,PIC_MASTER_CMD);	/* 'Specific EOI to master */
-	}
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-	return;
-
-spurious_8259A_irq:
-	/*
-	 * this is the slow path - should happen rarely.
-	 */
-	if (i8259A_irq_real(irq))
-		/*
-		 * oops, the IRQ _is_ in service according to the
-		 * 8259A - not spurious, go handle it.
-		 */
-		goto handle_real_irq;
-
-	{
-		static int spurious_irq_mask;
-		/*
-		 * At this point we can be sure the IRQ is spurious,
-		 * lets ACK and report it. [once per IRQ]
-		 */
-		if (!(spurious_irq_mask & irqmask)) {
-			printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
-			spurious_irq_mask |= irqmask;
-		}
-		atomic_inc(&irq_err_count);
-		/*
-		 * Theoretically we do not have to handle this IRQ,
-		 * but in Linux this does not cause problems and is
-		 * simpler for us.
-		 */
-		goto handle_real_irq;
-	}
-}
-
-static char irq_trigger[2];
-/**
- * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
- */
-static void restore_ELCR(char *trigger)
-{
-	outb(trigger[0], 0x4d0);
-	outb(trigger[1], 0x4d1);
-}
-
-static void save_ELCR(char *trigger)
-{
-	/* IRQ 0,1,2,8,13 are marked as reserved */
-	trigger[0] = inb(0x4d0) & 0xF8;
-	trigger[1] = inb(0x4d1) & 0xDE;
-}
-
-static int i8259A_resume(struct sys_device *dev)
-{
-	init_8259A(i8259A_auto_eoi);
-	restore_ELCR(irq_trigger);
-	return 0;
-}
-
-static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
-{
-	save_ELCR(irq_trigger);
-	return 0;
-}
-
-static int i8259A_shutdown(struct sys_device *dev)
-{
-	/* Put the i8259A into a quiescent state that
-	 * the kernel initialization code can get it
-	 * out of.
-	 */
-	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
-	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-1 */
-	return 0;
-}
-
-static struct sysdev_class i8259_sysdev_class = {
-	.name = "i8259",
-	.suspend = i8259A_suspend,
-	.resume = i8259A_resume,
-	.shutdown = i8259A_shutdown,
-};
-
-static struct sys_device device_i8259A = {
-	.id	= 0,
-	.cls	= &i8259_sysdev_class,
-};
-
-static int __init i8259A_init_sysfs(void)
-{
-	int error = sysdev_class_register(&i8259_sysdev_class);
-	if (!error)
-		error = sysdev_register(&device_i8259A);
-	return error;
-}
-
-device_initcall(i8259A_init_sysfs);
-
-void init_8259A(int auto_eoi)
-{
-	unsigned long flags;
-
-	i8259A_auto_eoi = auto_eoi;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-
-	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
-	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
-
-	/*
-	 * outb_pic - this has to work on a wide range of PC hardware.
-	 */
-	outb_pic(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
-	outb_pic(0x20 + 0, PIC_MASTER_IMR);	/* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
-	outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);	/* 8259A-1 (the master) has a slave on IR2 */
-	if (auto_eoi)	/* master does Auto EOI */
-		outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
-	else		/* master expects normal EOI */
-		outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
-
-	outb_pic(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
-	outb_pic(0x20 + 8, PIC_SLAVE_IMR);	/* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
-	outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);	/* 8259A-2 is a slave on master's IR2 */
-	outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */
-	if (auto_eoi)
-		/*
-		 * In AEOI mode we just have to mask the interrupt
-		 * when acking.
-		 */
-		i8259A_chip.mask_ack = disable_8259A_irq;
-	else
-		i8259A_chip.mask_ack = mask_and_ack_8259A;
-
-	udelay(100);		/* wait for 8259A to initialize */
-
-	outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
-	outb(cached_slave_mask, PIC_SLAVE_IMR);	  /* restore slave IRQ mask */
-
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-}
 
 /*
  * Note that on a 486, we don't want to do a SIGFPE on an irq13
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index fa57a156850..a95d2bd27e9 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -87,315 +87,6 @@ static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) =
 #undef IRQ
 #undef IRQLIST_16
 
-/*
- * This is the 'legacy' 8259A Programmable Interrupt Controller,
- * present in the majority of PC/AT boxes.
- * plus some generic x86 specific things if generic specifics makes
- * any sense at all.
- * this file should become arch/i386/kernel/irq.c when the old irq.c
- * moves to arch independent land
- */
-
-static int i8259A_auto_eoi;
-DEFINE_SPINLOCK(i8259A_lock);
-static void mask_and_ack_8259A(unsigned int);
-
-static struct irq_chip i8259A_chip = {
-	.name		= "XT-PIC",
-	.mask		= disable_8259A_irq,
-	.disable	= disable_8259A_irq,
-	.unmask		= enable_8259A_irq,
-	.mask_ack	= mask_and_ack_8259A,
-};
-
-/*
- * 8259A PIC functions to handle ISA devices:
- */
-
-/*
- * This contains the irq mask for both 8259A irq controllers,
- */
-unsigned int cached_irq_mask = 0xffff;
-
-/*
- * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
- * boards the timer interrupt is not really connected to any IO-APIC pin,
- * it's fed to the master 8259A's IR0 line only.
- *
- * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
- * this 'mixed mode' IRQ handling costs nothing because it's only used
- * at IRQ setup time.
- */
-unsigned long io_apic_irqs;
-
-void disable_8259A_irq(unsigned int irq)
-{
-	unsigned int mask = 1 << irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	cached_irq_mask |= mask;
-	if (irq & 8)
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-	else
-		outb(cached_master_mask, PIC_MASTER_IMR);
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-void enable_8259A_irq(unsigned int irq)
-{
-	unsigned int mask = ~(1 << irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	cached_irq_mask &= mask;
-	if (irq & 8)
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-	else
-		outb(cached_master_mask, PIC_MASTER_IMR);
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-int i8259A_irq_pending(unsigned int irq)
-{
-	unsigned int mask = 1<<irq;
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	if (irq < 8)
-		ret = inb(PIC_MASTER_CMD) & mask;
-	else
-		ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-
-	return ret;
-}
-
-void make_8259A_irq(unsigned int irq)
-{
-	disable_irq_nosync(irq);
-	io_apic_irqs &= ~(1<<irq);
-	set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
-				      "XT");
-	enable_irq(irq);
-}
-
-/*
- * This function assumes to be called rarely. Switching between
- * 8259A registers is slow.
- * This has to be protected by the irq controller spinlock
- * before being called.
- */
-static inline int i8259A_irq_real(unsigned int irq)
-{
-	int value;
-	int irqmask = 1<<irq;
-
-	if (irq < 8) {
-		outb(0x0B,PIC_MASTER_CMD);	/* ISR register */
-		value = inb(PIC_MASTER_CMD) & irqmask;
-		outb(0x0A,PIC_MASTER_CMD);	/* back to the IRR register */
-		return value;
-	}
-	outb(0x0B,PIC_SLAVE_CMD);	/* ISR register */
-	value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
-	outb(0x0A,PIC_SLAVE_CMD);	/* back to the IRR register */
-	return value;
-}
-
-/*
- * Careful! The 8259A is a fragile beast, it pretty
- * much _has_ to be done exactly like this (mask it
- * first, _then_ send the EOI, and the order of EOI
- * to the two 8259s is important!
- */
-static void mask_and_ack_8259A(unsigned int irq)
-{
-	unsigned int irqmask = 1 << irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	/*
-	 * Lightweight spurious IRQ detection. We do not want
-	 * to overdo spurious IRQ handling - it's usually a sign
-	 * of hardware problems, so we only do the checks we can
-	 * do without slowing down good hardware unnecessarily.
-	 *
-	 * Note that IRQ7 and IRQ15 (the two spurious IRQs
-	 * usually resulting from the 8259A-1|2 PICs) occur
-	 * even if the IRQ is masked in the 8259A. Thus we
-	 * can check spurious 8259A IRQs without doing the
-	 * quite slow i8259A_irq_real() call for every IRQ.
-	 * This does not cover 100% of spurious interrupts,
-	 * but should be enough to warn the user that there
-	 * is something bad going on ...
-	 */
-	if (cached_irq_mask & irqmask)
-		goto spurious_8259A_irq;
-	cached_irq_mask |= irqmask;
-
-handle_real_irq:
-	if (irq & 8) {
-		inb(PIC_SLAVE_IMR);	/* DUMMY - (do we need this?) */
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-		/* 'Specific EOI' to slave */
-		outb(0x60+(irq&7),PIC_SLAVE_CMD);
-		 /* 'Specific EOI' to master-IRQ2 */
-		outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD);
-	} else {
-		inb(PIC_MASTER_IMR);	/* DUMMY - (do we need this?) */
-		outb(cached_master_mask, PIC_MASTER_IMR);
-		/* 'Specific EOI' to master */
-		outb(0x60+irq,PIC_MASTER_CMD);
-	}
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-	return;
-
-spurious_8259A_irq:
-	/*
-	 * this is the slow path - should happen rarely.
-	 */
-	if (i8259A_irq_real(irq))
-		/*
-		 * oops, the IRQ _is_ in service according to the
-		 * 8259A - not spurious, go handle it.
-		 */
-		goto handle_real_irq;
-
-	{
-		static int spurious_irq_mask;
-		/*
-		 * At this point we can be sure the IRQ is spurious,
-		 * lets ACK and report it. [once per IRQ]
-		 */
-		if (!(spurious_irq_mask & irqmask)) {
-			printk(KERN_DEBUG
-			       "spurious 8259A interrupt: IRQ%d.\n", irq);
-			spurious_irq_mask |= irqmask;
-		}
-		atomic_inc(&irq_err_count);
-		/*
-		 * Theoretically we do not have to handle this IRQ,
-		 * but in Linux this does not cause problems and is
-		 * simpler for us.
-		 */
-		goto handle_real_irq;
-	}
-}
-
-static char irq_trigger[2];
-/**
- * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
- */
-static void restore_ELCR(char *trigger)
-{
-	outb(trigger[0], 0x4d0);
-	outb(trigger[1], 0x4d1);
-}
-
-static void save_ELCR(char *trigger)
-{
-	/* IRQ 0,1,2,8,13 are marked as reserved */
-	trigger[0] = inb(0x4d0) & 0xF8;
-	trigger[1] = inb(0x4d1) & 0xDE;
-}
-
-static int i8259A_resume(struct sys_device *dev)
-{
-	init_8259A(i8259A_auto_eoi);
-	restore_ELCR(irq_trigger);
-	return 0;
-}
-
-static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
-{
-	save_ELCR(irq_trigger);
-	return 0;
-}
-
-static int i8259A_shutdown(struct sys_device *dev)
-{
-	/* Put the i8259A into a quiescent state that
-	 * the kernel initialization code can get it
-	 * out of.
-	 */
-	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
-	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-1 */
-	return 0;
-}
-
-static struct sysdev_class i8259_sysdev_class = {
-	.name = "i8259",
-	.suspend = i8259A_suspend,
-	.resume = i8259A_resume,
-	.shutdown = i8259A_shutdown,
-};
-
-static struct sys_device device_i8259A = {
-	.id	= 0,
-	.cls	= &i8259_sysdev_class,
-};
-
-static int __init i8259A_init_sysfs(void)
-{
-	int error = sysdev_class_register(&i8259_sysdev_class);
-	if (!error)
-		error = sysdev_register(&device_i8259A);
-	return error;
-}
-
-device_initcall(i8259A_init_sysfs);
-
-void init_8259A(int auto_eoi)
-{
-	unsigned long flags;
-
-	i8259A_auto_eoi = auto_eoi;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-
-	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
-	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
-
-	/*
-	 * outb_pic - this has to work on a wide range of PC hardware.
-	 */
-	outb_pic(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
-	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
-	outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
-	/* 8259A-1 (the master) has a slave on IR2 */
-	outb_pic(0x04, PIC_MASTER_IMR);
-	if (auto_eoi)	/* master does Auto EOI */
-		outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
-	else		/* master expects normal EOI */
-		outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
-
-	outb_pic(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
-	/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
-	outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
-	/* 8259A-2 is a slave on master's IR2 */
-	outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
-	/* (slave's support for AEOI in flat mode is to be investigated) */
-	outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
-
-	if (auto_eoi)
-		/*
-		 * In AEOI mode we just have to mask the interrupt
-		 * when acking.
-		 */
-		i8259A_chip.mask_ack = disable_8259A_irq;
-	else
-		i8259A_chip.mask_ack = mask_and_ack_8259A;
-
-	udelay(100);		/* wait for 8259A to initialize */
-
-	outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
-	outb(cached_slave_mask, PIC_SLAVE_IMR);	  /* restore slave IRQ mask */
-
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
 
 
diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h
index 45d4df3e51e..2f98df91f1f 100644
--- a/include/asm-x86/i8259.h
+++ b/include/asm-x86/i8259.h
@@ -55,4 +55,6 @@ static inline void outb_pic(unsigned char value, unsigned int port)
 	udelay(2);
 }
 
+extern struct irq_chip i8259A_chip;
+
 #endif	/* __ASM_I8259_H__ */
-- 
cgit v1.2.3-70-g09d2


From 21fd5132b223a10bdf17713dd0bf321cbd6471d2 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@suse.cz>
Date: Wed, 21 May 2008 11:44:02 +0200
Subject: x86: automatical unification of i8259.c

Make conversion of i8259 very mechanical -- i8259 was generated by
 diff -D, with too different parts left in i8259_32 and
i8259_64.c. Only "by hand" changes were removal of #ifdef from middle
of the comment (prevented compilation) and removal of one static to
allow splitting into files.

Of course, it will need some cleanups now, and those will follow.

Signed-of-by: Pavel Machek <pavel@suse.cz>
---
 arch/x86/kernel/Makefile   |   2 +-
 arch/x86/kernel/i8259.c    | 368 +++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/i8259_32.c | 295 ------------------------------------
 arch/x86/kernel/i8259_64.c | 309 -------------------------------------
 include/asm-x86/i8259.h    |   2 +
 5 files changed, 371 insertions(+), 605 deletions(-)
 create mode 100644 arch/x86/kernel/i8259.c

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3b472..f71a76ef259 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -18,7 +18,7 @@ CFLAGS_tsc_64.o		:= $(nostackp)
 obj-y			:= process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
 obj-y			+= traps_$(BITS).o irq_$(BITS).o
 obj-y			+= time_$(BITS).o ioport.o ldt.o
-obj-y			+= setup_$(BITS).o i8259_$(BITS).o setup.o
+obj-y			+= setup_$(BITS).o i8259.o i8259_$(BITS).o setup.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o setup64.o
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
new file mode 100644
index 00000000000..2decba6b010
--- /dev/null
+++ b/arch/x86/kernel/i8259.c
@@ -0,0 +1,368 @@
+#ifdef CONFIG_X86_64
+#include <linux/linkage.h>
+#endif /* CONFIG_X86_64 */
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#ifdef CONFIG_X86_64
+#include <linux/timex.h>
+#endif /* CONFIG_X86_64 */
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/kernel_stat.h>
+#include <linux/sysdev.h>
+#include <linux/bitops.h>
+
+#ifdef CONFIG_X86_64
+#include <asm/acpi.h>
+#endif /* CONFIG_X86_64 */
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#ifndef CONFIG_X86_64
+#include <asm/timer.h>
+#else /* CONFIG_X86_64 */
+#include <asm/hw_irq.h>
+#endif /* CONFIG_X86_64 */
+#include <asm/pgtable.h>
+#include <asm/delay.h>
+#include <asm/desc.h>
+#include <asm/apic.h>
+#ifndef CONFIG_X86_64
+#include <asm/arch_hooks.h>
+#endif /* ! CONFIG_X86_64 */
+#include <asm/i8259.h>
+
+/*
+ * This is the 'legacy' 8259A Programmable Interrupt Controller,
+ * present in the majority of PC/AT boxes.
+ * plus some generic x86 specific things if generic specifics makes
+ * any sense at all.
+ */
+
+static int i8259A_auto_eoi;
+DEFINE_SPINLOCK(i8259A_lock);
+static void mask_and_ack_8259A(unsigned int);
+
+struct irq_chip i8259A_chip = {
+	.name		= "XT-PIC",
+	.mask		= disable_8259A_irq,
+	.disable	= disable_8259A_irq,
+	.unmask		= enable_8259A_irq,
+	.mask_ack	= mask_and_ack_8259A,
+};
+
+/*
+ * 8259A PIC functions to handle ISA devices:
+ */
+
+/*
+ * This contains the irq mask for both 8259A irq controllers,
+ */
+unsigned int cached_irq_mask = 0xffff;
+
+/*
+ * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
+ * boards the timer interrupt is not really connected to any IO-APIC pin,
+ * it's fed to the master 8259A's IR0 line only.
+ *
+ * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
+ * this 'mixed mode' IRQ handling costs nothing because it's only used
+ * at IRQ setup time.
+ */
+unsigned long io_apic_irqs;
+
+void disable_8259A_irq(unsigned int irq)
+{
+	unsigned int mask = 1 << irq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+	cached_irq_mask |= mask;
+	if (irq & 8)
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
+	else
+		outb(cached_master_mask, PIC_MASTER_IMR);
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void enable_8259A_irq(unsigned int irq)
+{
+	unsigned int mask = ~(1 << irq);
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+	cached_irq_mask &= mask;
+	if (irq & 8)
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
+	else
+		outb(cached_master_mask, PIC_MASTER_IMR);
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+int i8259A_irq_pending(unsigned int irq)
+{
+	unsigned int mask = 1<<irq;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+	if (irq < 8)
+		ret = inb(PIC_MASTER_CMD) & mask;
+	else
+		ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+
+	return ret;
+}
+
+void make_8259A_irq(unsigned int irq)
+{
+	disable_irq_nosync(irq);
+	io_apic_irqs &= ~(1<<irq);
+	set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
+				      "XT");
+	enable_irq(irq);
+}
+
+/*
+ * This function assumes to be called rarely. Switching between
+ * 8259A registers is slow.
+ * This has to be protected by the irq controller spinlock
+ * before being called.
+ */
+static inline int i8259A_irq_real(unsigned int irq)
+{
+	int value;
+	int irqmask = 1<<irq;
+
+	if (irq < 8) {
+		outb(0x0B,PIC_MASTER_CMD);	/* ISR register */
+		value = inb(PIC_MASTER_CMD) & irqmask;
+		outb(0x0A,PIC_MASTER_CMD);	/* back to the IRR register */
+		return value;
+	}
+	outb(0x0B,PIC_SLAVE_CMD);	/* ISR register */
+	value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
+	outb(0x0A,PIC_SLAVE_CMD);	/* back to the IRR register */
+	return value;
+}
+
+/*
+ * Careful! The 8259A is a fragile beast, it pretty
+ * much _has_ to be done exactly like this (mask it
+ * first, _then_ send the EOI, and the order of EOI
+ * to the two 8259s is important!
+ */
+static void mask_and_ack_8259A(unsigned int irq)
+{
+	unsigned int irqmask = 1 << irq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+	/*
+	 * Lightweight spurious IRQ detection. We do not want
+	 * to overdo spurious IRQ handling - it's usually a sign
+	 * of hardware problems, so we only do the checks we can
+	 * do without slowing down good hardware unnecessarily.
+	 *
+	 * Note that IRQ7 and IRQ15 (the two spurious IRQs
+	 * usually resulting from the 8259A-1|2 PICs) occur
+	 * even if the IRQ is masked in the 8259A. Thus we
+	 * can check spurious 8259A IRQs without doing the
+	 * quite slow i8259A_irq_real() call for every IRQ.
+	 * This does not cover 100% of spurious interrupts,
+	 * but should be enough to warn the user that there
+	 * is something bad going on ...
+	 */
+	if (cached_irq_mask & irqmask)
+		goto spurious_8259A_irq;
+	cached_irq_mask |= irqmask;
+
+handle_real_irq:
+	if (irq & 8) {
+		inb(PIC_SLAVE_IMR);	/* DUMMY - (do we need this?) */
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
+#ifndef CONFIG_X86_64
+		outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */
+		outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */
+#else /* CONFIG_X86_64 */
+		/* 'Specific EOI' to slave */
+		outb(0x60+(irq&7),PIC_SLAVE_CMD);
+		 /* 'Specific EOI' to master-IRQ2 */
+		outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD);
+#endif /* CONFIG_X86_64 */
+	} else {
+		inb(PIC_MASTER_IMR);	/* DUMMY - (do we need this?) */
+		outb(cached_master_mask, PIC_MASTER_IMR);
+#ifndef CONFIG_X86_64
+		outb(0x60+irq,PIC_MASTER_CMD);	/* 'Specific EOI to master */
+#else /* CONFIG_X86_64 */
+		/* 'Specific EOI' to master */
+		outb(0x60+irq,PIC_MASTER_CMD);
+#endif /* CONFIG_X86_64 */
+	}
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+	return;
+
+spurious_8259A_irq:
+	/*
+	 * this is the slow path - should happen rarely.
+	 */
+	if (i8259A_irq_real(irq))
+		/*
+		 * oops, the IRQ _is_ in service according to the
+		 * 8259A - not spurious, go handle it.
+		 */
+		goto handle_real_irq;
+
+	{
+		static int spurious_irq_mask;
+		/*
+		 * At this point we can be sure the IRQ is spurious,
+		 * lets ACK and report it. [once per IRQ]
+		 */
+		if (!(spurious_irq_mask & irqmask)) {
+#ifndef CONFIG_X86_64
+			printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
+#else /* CONFIG_X86_64 */
+			printk(KERN_DEBUG
+			       "spurious 8259A interrupt: IRQ%d.\n", irq);
+#endif /* CONFIG_X86_64 */
+			spurious_irq_mask |= irqmask;
+		}
+		atomic_inc(&irq_err_count);
+		/*
+		 * Theoretically we do not have to handle this IRQ,
+		 * but in Linux this does not cause problems and is
+		 * simpler for us.
+		 */
+		goto handle_real_irq;
+	}
+}
+
+static char irq_trigger[2];
+/**
+ * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
+ */
+static void restore_ELCR(char *trigger)
+{
+	outb(trigger[0], 0x4d0);
+	outb(trigger[1], 0x4d1);
+}
+
+static void save_ELCR(char *trigger)
+{
+	/* IRQ 0,1,2,8,13 are marked as reserved */
+	trigger[0] = inb(0x4d0) & 0xF8;
+	trigger[1] = inb(0x4d1) & 0xDE;
+}
+
+static int i8259A_resume(struct sys_device *dev)
+{
+	init_8259A(i8259A_auto_eoi);
+	restore_ELCR(irq_trigger);
+	return 0;
+}
+
+static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
+{
+	save_ELCR(irq_trigger);
+	return 0;
+}
+
+static int i8259A_shutdown(struct sys_device *dev)
+{
+	/* Put the i8259A into a quiescent state that
+	 * the kernel initialization code can get it
+	 * out of.
+	 */
+	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-1 */
+	return 0;
+}
+
+static struct sysdev_class i8259_sysdev_class = {
+	.name = "i8259",
+	.suspend = i8259A_suspend,
+	.resume = i8259A_resume,
+	.shutdown = i8259A_shutdown,
+};
+
+static struct sys_device device_i8259A = {
+	.id	= 0,
+	.cls	= &i8259_sysdev_class,
+};
+
+static int __init i8259A_init_sysfs(void)
+{
+	int error = sysdev_class_register(&i8259_sysdev_class);
+	if (!error)
+		error = sysdev_register(&device_i8259A);
+	return error;
+}
+
+device_initcall(i8259A_init_sysfs);
+
+void init_8259A(int auto_eoi)
+{
+	unsigned long flags;
+
+	i8259A_auto_eoi = auto_eoi;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+
+	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
+
+	/*
+	 * outb_pic - this has to work on a wide range of PC hardware.
+	 */
+	outb_pic(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
+#ifndef CONFIG_X86_64
+	outb_pic(0x20 + 0, PIC_MASTER_IMR);	/* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
+	outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);	/* 8259A-1 (the master) has a slave on IR2 */
+#else /* CONFIG_X86_64 */
+	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
+	outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
+	/* 8259A-1 (the master) has a slave on IR2 */
+	outb_pic(0x04, PIC_MASTER_IMR);
+#endif /* CONFIG_X86_64 */
+	if (auto_eoi)	/* master does Auto EOI */
+		outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
+	else		/* master expects normal EOI */
+		outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
+
+	outb_pic(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
+#ifndef CONFIG_X86_64
+	outb_pic(0x20 + 8, PIC_SLAVE_IMR);	/* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
+	outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);	/* 8259A-2 is a slave on master's IR2 */
+	outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */
+#else /* CONFIG_X86_64 */
+	/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
+	outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
+	/* 8259A-2 is a slave on master's IR2 */
+	outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
+	/* (slave's support for AEOI in flat mode is to be investigated) */
+	outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
+
+#endif /* CONFIG_X86_64 */
+	if (auto_eoi)
+		/*
+		 * In AEOI mode we just have to mask the interrupt
+		 * when acking.
+		 */
+		i8259A_chip.mask_ack = disable_8259A_irq;
+	else
+		i8259A_chip.mask_ack = mask_and_ack_8259A;
+
+	udelay(100);		/* wait for 8259A to initialize */
+
+	outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
+	outb(cached_slave_mask, PIC_SLAVE_IMR);	  /* restore slave IRQ mask */
+
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c
index fe631967d62..d66914287ee 100644
--- a/arch/x86/kernel/i8259_32.c
+++ b/arch/x86/kernel/i8259_32.c
@@ -21,302 +21,7 @@
 #include <asm/arch_hooks.h>
 #include <asm/i8259.h>
 
-/*
- * This is the 'legacy' 8259A Programmable Interrupt Controller,
- * present in the majority of PC/AT boxes.
- * plus some generic x86 specific things if generic specifics makes
- * any sense at all.
- */
-
-static int i8259A_auto_eoi;
-DEFINE_SPINLOCK(i8259A_lock);
-static void mask_and_ack_8259A(unsigned int);
-
-static struct irq_chip i8259A_chip = {
-	.name		= "XT-PIC",
-	.mask		= disable_8259A_irq,
-	.disable	= disable_8259A_irq,
-	.unmask		= enable_8259A_irq,
-	.mask_ack	= mask_and_ack_8259A,
-};
-
-/*
- * 8259A PIC functions to handle ISA devices:
- */
-
-/*
- * This contains the irq mask for both 8259A irq controllers,
- */
-unsigned int cached_irq_mask = 0xffff;
-
-/*
- * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
- * boards the timer interrupt is not really connected to any IO-APIC pin,
- * it's fed to the master 8259A's IR0 line only.
- *
- * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
- * this 'mixed mode' IRQ handling costs nothing because it's only used
- * at IRQ setup time.
- */
-unsigned long io_apic_irqs;
-
-void disable_8259A_irq(unsigned int irq)
-{
-	unsigned int mask = 1 << irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	cached_irq_mask |= mask;
-	if (irq & 8)
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-	else
-		outb(cached_master_mask, PIC_MASTER_IMR);
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-void enable_8259A_irq(unsigned int irq)
-{
-	unsigned int mask = ~(1 << irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	cached_irq_mask &= mask;
-	if (irq & 8)
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-	else
-		outb(cached_master_mask, PIC_MASTER_IMR);
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-int i8259A_irq_pending(unsigned int irq)
-{
-	unsigned int mask = 1<<irq;
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	if (irq < 8)
-		ret = inb(PIC_MASTER_CMD) & mask;
-	else
-		ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-
-	return ret;
-}
-
-void make_8259A_irq(unsigned int irq)
-{
-	disable_irq_nosync(irq);
-	io_apic_irqs &= ~(1<<irq);
-	set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
-				      "XT");
-	enable_irq(irq);
-}
-
-/*
- * This function assumes to be called rarely. Switching between
- * 8259A registers is slow.
- * This has to be protected by the irq controller spinlock
- * before being called.
- */
-static inline int i8259A_irq_real(unsigned int irq)
-{
-	int value;
-	int irqmask = 1<<irq;
-
-	if (irq < 8) {
-		outb(0x0B,PIC_MASTER_CMD);	/* ISR register */
-		value = inb(PIC_MASTER_CMD) & irqmask;
-		outb(0x0A,PIC_MASTER_CMD);	/* back to the IRR register */
-		return value;
-	}
-	outb(0x0B,PIC_SLAVE_CMD);	/* ISR register */
-	value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
-	outb(0x0A,PIC_SLAVE_CMD);	/* back to the IRR register */
-	return value;
-}
-
-/*
- * Careful! The 8259A is a fragile beast, it pretty
- * much _has_ to be done exactly like this (mask it
- * first, _then_ send the EOI, and the order of EOI
- * to the two 8259s is important!
- */
-static void mask_and_ack_8259A(unsigned int irq)
-{
-	unsigned int irqmask = 1 << irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	/*
-	 * Lightweight spurious IRQ detection. We do not want
-	 * to overdo spurious IRQ handling - it's usually a sign
-	 * of hardware problems, so we only do the checks we can
-	 * do without slowing down good hardware unnecessarily.
-	 *
-	 * Note that IRQ7 and IRQ15 (the two spurious IRQs
-	 * usually resulting from the 8259A-1|2 PICs) occur
-	 * even if the IRQ is masked in the 8259A. Thus we
-	 * can check spurious 8259A IRQs without doing the
-	 * quite slow i8259A_irq_real() call for every IRQ.
-	 * This does not cover 100% of spurious interrupts,
-	 * but should be enough to warn the user that there
-	 * is something bad going on ...
-	 */
-	if (cached_irq_mask & irqmask)
-		goto spurious_8259A_irq;
-	cached_irq_mask |= irqmask;
 
-handle_real_irq:
-	if (irq & 8) {
-		inb(PIC_SLAVE_IMR);	/* DUMMY - (do we need this?) */
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-		outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */
-		outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */
-	} else {
-		inb(PIC_MASTER_IMR);	/* DUMMY - (do we need this?) */
-		outb(cached_master_mask, PIC_MASTER_IMR);
-		outb(0x60+irq,PIC_MASTER_CMD);	/* 'Specific EOI to master */
-	}
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-	return;
-
-spurious_8259A_irq:
-	/*
-	 * this is the slow path - should happen rarely.
-	 */
-	if (i8259A_irq_real(irq))
-		/*
-		 * oops, the IRQ _is_ in service according to the
-		 * 8259A - not spurious, go handle it.
-		 */
-		goto handle_real_irq;
-
-	{
-		static int spurious_irq_mask;
-		/*
-		 * At this point we can be sure the IRQ is spurious,
-		 * lets ACK and report it. [once per IRQ]
-		 */
-		if (!(spurious_irq_mask & irqmask)) {
-			printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
-			spurious_irq_mask |= irqmask;
-		}
-		atomic_inc(&irq_err_count);
-		/*
-		 * Theoretically we do not have to handle this IRQ,
-		 * but in Linux this does not cause problems and is
-		 * simpler for us.
-		 */
-		goto handle_real_irq;
-	}
-}
-
-static char irq_trigger[2];
-/**
- * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
- */
-static void restore_ELCR(char *trigger)
-{
-	outb(trigger[0], 0x4d0);
-	outb(trigger[1], 0x4d1);
-}
-
-static void save_ELCR(char *trigger)
-{
-	/* IRQ 0,1,2,8,13 are marked as reserved */
-	trigger[0] = inb(0x4d0) & 0xF8;
-	trigger[1] = inb(0x4d1) & 0xDE;
-}
-
-static int i8259A_resume(struct sys_device *dev)
-{
-	init_8259A(i8259A_auto_eoi);
-	restore_ELCR(irq_trigger);
-	return 0;
-}
-
-static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
-{
-	save_ELCR(irq_trigger);
-	return 0;
-}
-
-static int i8259A_shutdown(struct sys_device *dev)
-{
-	/* Put the i8259A into a quiescent state that
-	 * the kernel initialization code can get it
-	 * out of.
-	 */
-	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
-	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-1 */
-	return 0;
-}
-
-static struct sysdev_class i8259_sysdev_class = {
-	.name = "i8259",
-	.suspend = i8259A_suspend,
-	.resume = i8259A_resume,
-	.shutdown = i8259A_shutdown,
-};
-
-static struct sys_device device_i8259A = {
-	.id	= 0,
-	.cls	= &i8259_sysdev_class,
-};
-
-static int __init i8259A_init_sysfs(void)
-{
-	int error = sysdev_class_register(&i8259_sysdev_class);
-	if (!error)
-		error = sysdev_register(&device_i8259A);
-	return error;
-}
-
-device_initcall(i8259A_init_sysfs);
-
-void init_8259A(int auto_eoi)
-{
-	unsigned long flags;
-
-	i8259A_auto_eoi = auto_eoi;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-
-	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
-	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
-
-	/*
-	 * outb_pic - this has to work on a wide range of PC hardware.
-	 */
-	outb_pic(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
-	outb_pic(0x20 + 0, PIC_MASTER_IMR);	/* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
-	outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);	/* 8259A-1 (the master) has a slave on IR2 */
-	if (auto_eoi)	/* master does Auto EOI */
-		outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
-	else		/* master expects normal EOI */
-		outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
-
-	outb_pic(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
-	outb_pic(0x20 + 8, PIC_SLAVE_IMR);	/* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
-	outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);	/* 8259A-2 is a slave on master's IR2 */
-	outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */
-	if (auto_eoi)
-		/*
-		 * In AEOI mode we just have to mask the interrupt
-		 * when acking.
-		 */
-		i8259A_chip.mask_ack = disable_8259A_irq;
-	else
-		i8259A_chip.mask_ack = mask_and_ack_8259A;
-
-	udelay(100);		/* wait for 8259A to initialize */
-
-	outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
-	outb(cached_slave_mask, PIC_SLAVE_IMR);	  /* restore slave IRQ mask */
-
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-}
 
 /*
  * Note that on a 486, we don't want to do a SIGFPE on an irq13
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index 1870e0e8655..b44095efcf8 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -101,315 +101,6 @@ static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) =
 #undef IRQ
 #undef IRQLIST_16
 
-/*
- * This is the 'legacy' 8259A Programmable Interrupt Controller,
- * present in the majority of PC/AT boxes.
- * plus some generic x86 specific things if generic specifics makes
- * any sense at all.
- * this file should become arch/i386/kernel/irq.c when the old irq.c
- * moves to arch independent land
- */
-
-static int i8259A_auto_eoi;
-DEFINE_SPINLOCK(i8259A_lock);
-static void mask_and_ack_8259A(unsigned int);
-
-static struct irq_chip i8259A_chip = {
-	.name		= "XT-PIC",
-	.mask		= disable_8259A_irq,
-	.disable	= disable_8259A_irq,
-	.unmask		= enable_8259A_irq,
-	.mask_ack	= mask_and_ack_8259A,
-};
-
-/*
- * 8259A PIC functions to handle ISA devices:
- */
-
-/*
- * This contains the irq mask for both 8259A irq controllers,
- */
-unsigned int cached_irq_mask = 0xffff;
-
-/*
- * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
- * boards the timer interrupt is not really connected to any IO-APIC pin,
- * it's fed to the master 8259A's IR0 line only.
- *
- * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
- * this 'mixed mode' IRQ handling costs nothing because it's only used
- * at IRQ setup time.
- */
-unsigned long io_apic_irqs;
-
-void disable_8259A_irq(unsigned int irq)
-{
-	unsigned int mask = 1 << irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	cached_irq_mask |= mask;
-	if (irq & 8)
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-	else
-		outb(cached_master_mask, PIC_MASTER_IMR);
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-void enable_8259A_irq(unsigned int irq)
-{
-	unsigned int mask = ~(1 << irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	cached_irq_mask &= mask;
-	if (irq & 8)
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-	else
-		outb(cached_master_mask, PIC_MASTER_IMR);
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-int i8259A_irq_pending(unsigned int irq)
-{
-	unsigned int mask = 1<<irq;
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	if (irq < 8)
-		ret = inb(PIC_MASTER_CMD) & mask;
-	else
-		ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-
-	return ret;
-}
-
-void make_8259A_irq(unsigned int irq)
-{
-	disable_irq_nosync(irq);
-	io_apic_irqs &= ~(1<<irq);
-	set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
-				      "XT");
-	enable_irq(irq);
-}
-
-/*
- * This function assumes to be called rarely. Switching between
- * 8259A registers is slow.
- * This has to be protected by the irq controller spinlock
- * before being called.
- */
-static inline int i8259A_irq_real(unsigned int irq)
-{
-	int value;
-	int irqmask = 1<<irq;
-
-	if (irq < 8) {
-		outb(0x0B,PIC_MASTER_CMD);	/* ISR register */
-		value = inb(PIC_MASTER_CMD) & irqmask;
-		outb(0x0A,PIC_MASTER_CMD);	/* back to the IRR register */
-		return value;
-	}
-	outb(0x0B,PIC_SLAVE_CMD);	/* ISR register */
-	value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
-	outb(0x0A,PIC_SLAVE_CMD);	/* back to the IRR register */
-	return value;
-}
-
-/*
- * Careful! The 8259A is a fragile beast, it pretty
- * much _has_ to be done exactly like this (mask it
- * first, _then_ send the EOI, and the order of EOI
- * to the two 8259s is important!
- */
-static void mask_and_ack_8259A(unsigned int irq)
-{
-	unsigned int irqmask = 1 << irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-	/*
-	 * Lightweight spurious IRQ detection. We do not want
-	 * to overdo spurious IRQ handling - it's usually a sign
-	 * of hardware problems, so we only do the checks we can
-	 * do without slowing down good hardware unnecessarily.
-	 *
-	 * Note that IRQ7 and IRQ15 (the two spurious IRQs
-	 * usually resulting from the 8259A-1|2 PICs) occur
-	 * even if the IRQ is masked in the 8259A. Thus we
-	 * can check spurious 8259A IRQs without doing the
-	 * quite slow i8259A_irq_real() call for every IRQ.
-	 * This does not cover 100% of spurious interrupts,
-	 * but should be enough to warn the user that there
-	 * is something bad going on ...
-	 */
-	if (cached_irq_mask & irqmask)
-		goto spurious_8259A_irq;
-	cached_irq_mask |= irqmask;
-
-handle_real_irq:
-	if (irq & 8) {
-		inb(PIC_SLAVE_IMR);	/* DUMMY - (do we need this?) */
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-		/* 'Specific EOI' to slave */
-		outb(0x60+(irq&7),PIC_SLAVE_CMD);
-		 /* 'Specific EOI' to master-IRQ2 */
-		outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD);
-	} else {
-		inb(PIC_MASTER_IMR);	/* DUMMY - (do we need this?) */
-		outb(cached_master_mask, PIC_MASTER_IMR);
-		/* 'Specific EOI' to master */
-		outb(0x60+irq,PIC_MASTER_CMD);
-	}
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-	return;
-
-spurious_8259A_irq:
-	/*
-	 * this is the slow path - should happen rarely.
-	 */
-	if (i8259A_irq_real(irq))
-		/*
-		 * oops, the IRQ _is_ in service according to the
-		 * 8259A - not spurious, go handle it.
-		 */
-		goto handle_real_irq;
-
-	{
-		static int spurious_irq_mask;
-		/*
-		 * At this point we can be sure the IRQ is spurious,
-		 * lets ACK and report it. [once per IRQ]
-		 */
-		if (!(spurious_irq_mask & irqmask)) {
-			printk(KERN_DEBUG
-			       "spurious 8259A interrupt: IRQ%d.\n", irq);
-			spurious_irq_mask |= irqmask;
-		}
-		atomic_inc(&irq_err_count);
-		/*
-		 * Theoretically we do not have to handle this IRQ,
-		 * but in Linux this does not cause problems and is
-		 * simpler for us.
-		 */
-		goto handle_real_irq;
-	}
-}
-
-static char irq_trigger[2];
-/**
- * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
- */
-static void restore_ELCR(char *trigger)
-{
-	outb(trigger[0], 0x4d0);
-	outb(trigger[1], 0x4d1);
-}
-
-static void save_ELCR(char *trigger)
-{
-	/* IRQ 0,1,2,8,13 are marked as reserved */
-	trigger[0] = inb(0x4d0) & 0xF8;
-	trigger[1] = inb(0x4d1) & 0xDE;
-}
-
-static int i8259A_resume(struct sys_device *dev)
-{
-	init_8259A(i8259A_auto_eoi);
-	restore_ELCR(irq_trigger);
-	return 0;
-}
-
-static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
-{
-	save_ELCR(irq_trigger);
-	return 0;
-}
-
-static int i8259A_shutdown(struct sys_device *dev)
-{
-	/* Put the i8259A into a quiescent state that
-	 * the kernel initialization code can get it
-	 * out of.
-	 */
-	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
-	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-1 */
-	return 0;
-}
-
-static struct sysdev_class i8259_sysdev_class = {
-	.name = "i8259",
-	.suspend = i8259A_suspend,
-	.resume = i8259A_resume,
-	.shutdown = i8259A_shutdown,
-};
-
-static struct sys_device device_i8259A = {
-	.id	= 0,
-	.cls	= &i8259_sysdev_class,
-};
-
-static int __init i8259A_init_sysfs(void)
-{
-	int error = sysdev_class_register(&i8259_sysdev_class);
-	if (!error)
-		error = sysdev_register(&device_i8259A);
-	return error;
-}
-
-device_initcall(i8259A_init_sysfs);
-
-void init_8259A(int auto_eoi)
-{
-	unsigned long flags;
-
-	i8259A_auto_eoi = auto_eoi;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-
-	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
-	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
-
-	/*
-	 * outb_pic - this has to work on a wide range of PC hardware.
-	 */
-	outb_pic(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
-	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
-	outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
-	/* 8259A-1 (the master) has a slave on IR2 */
-	outb_pic(0x04, PIC_MASTER_IMR);
-	if (auto_eoi)	/* master does Auto EOI */
-		outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
-	else		/* master expects normal EOI */
-		outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
-
-	outb_pic(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
-	/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
-	outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
-	/* 8259A-2 is a slave on master's IR2 */
-	outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
-	/* (slave's support for AEOI in flat mode is to be investigated) */
-	outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
-
-	if (auto_eoi)
-		/*
-		 * In AEOI mode we just have to mask the interrupt
-		 * when acking.
-		 */
-		i8259A_chip.mask_ack = disable_8259A_irq;
-	else
-		i8259A_chip.mask_ack = mask_and_ack_8259A;
-
-	udelay(100);		/* wait for 8259A to initialize */
-
-	outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
-	outb(cached_slave_mask, PIC_SLAVE_IMR);	  /* restore slave IRQ mask */
-
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
 
 
diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h
index 45d4df3e51e..2f98df91f1f 100644
--- a/include/asm-x86/i8259.h
+++ b/include/asm-x86/i8259.h
@@ -55,4 +55,6 @@ static inline void outb_pic(unsigned char value, unsigned int port)
 	udelay(2);
 }
 
+extern struct irq_chip i8259A_chip;
+
 #endif	/* __ASM_I8259_H__ */
-- 
cgit v1.2.3-70-g09d2


From 3711ccb07b7f0a13f4f1aa16a8fdca9c930f21ca Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 24 May 2008 17:24:34 +0200
Subject: x86: fixup the fallout of the bitops changes

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/thread_info.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 25d71053256..895339d2bc0 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -245,7 +245,7 @@ static inline void set_restore_sigmask(void)
 {
 	struct thread_info *ti = current_thread_info();
 	ti->status |= TS_RESTORE_SIGMASK;
-	set_bit(TIF_SIGPENDING, &ti->flags);
+	set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
 }
 #endif	/* !__ASSEMBLY__ */
 
-- 
cgit v1.2.3-70-g09d2


From 63687a528c39a67c1a213cdffa09feb0e6af9dbe Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Mon, 12 May 2008 15:44:41 +0200
Subject: x86: move tracedata to RODATA

.. allowing it to be write-protected just as other read-only data
under CONFIG_DEBUG_RODATA.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/vmlinux_32.lds.S  |  7 -------
 arch/x86/kernel/vmlinux_64.lds.S  |  7 -------
 drivers/base/power/trace.c        |  2 +-
 include/asm-generic/vmlinux.lds.h | 14 ++++++++++++++
 include/asm-x86/resume-trace.h    |  2 +-
 include/linux/resume-trace.h      |  2 +-
 6 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index ce5ed083a1e..2674f579627 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -60,13 +60,6 @@ SECTIONS
 
   BUG_TABLE :text
 
-  . = ALIGN(4);
-  .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {
-  	__tracedata_start = .;
-	*(.tracedata)
-  	__tracedata_end = .;
-  }
-
   RODATA
 
   /* writeable */
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fad3674b06a..687041bfbae 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -53,13 +53,6 @@ SECTIONS
 
   RODATA
 
-  . = ALIGN(4);
-  .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {
-  	__tracedata_start = .;
-	*(.tracedata)
-  	__tracedata_end = .;
-  }
-
   . = ALIGN(PAGE_SIZE);		/* Align data segment to page size boundary */
 				/* Data */
   .data : AT(ADDR(.data) - LOAD_OFFSET) {
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index 2b4b392dcbc..87a7f1d0257 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -153,7 +153,7 @@ EXPORT_SYMBOL(set_trace_device);
  * it's not any guarantee, but it's a high _likelihood_ that
  * the match is valid).
  */
-void generate_resume_trace(void *tracedata, unsigned int user)
+void generate_resume_trace(const void *tracedata, unsigned int user)
 {
 	unsigned short lineno = *(unsigned short *)tracedata;
 	const char *file = *(const char **)(tracedata + 2);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f054778e916..f1992dc5c42 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -93,6 +93,8 @@
 		VMLINUX_SYMBOL(__end_rio_route_ops) = .;		\
 	}								\
 									\
+	TRACEDATA							\
+									\
 	/* Kernel symbol table: Normal symbols */			\
 	__ksymtab         : AT(ADDR(__ksymtab) - LOAD_OFFSET) {		\
 		VMLINUX_SYMBOL(__start___ksymtab) = .;			\
@@ -318,6 +320,18 @@
 		__stop___bug_table = .;					\
 	}
 
+#ifdef CONFIG_PM_TRACE
+#define TRACEDATA							\
+	. = ALIGN(4);							\
+	.tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {		\
+	  	__tracedata_start = .;					\
+		*(.tracedata)						\
+	  	__tracedata_end = .;					\
+	}
+#else
+#define TRACEDATA
+#endif
+
 #define NOTES								\
 	.notes : AT(ADDR(.notes) - LOAD_OFFSET) {			\
 		VMLINUX_SYMBOL(__start_notes) = .;			\
diff --git a/include/asm-x86/resume-trace.h b/include/asm-x86/resume-trace.h
index 2557514d7ef..8d9f0b41ee8 100644
--- a/include/asm-x86/resume-trace.h
+++ b/include/asm-x86/resume-trace.h
@@ -6,7 +6,7 @@
 #define TRACE_RESUME(user)					\
 do {								\
 	if (pm_trace_enabled) {					\
-		void *tracedata;				\
+		const void *tracedata;				\
 		asm volatile(_ASM_MOV_UL " $1f,%0\n"		\
 			     ".section .tracedata,\"a\"\n"	\
 			     "1:\t.word %c1\n\t"		\
diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h
index f3f4f28c696..c9ba2fdf807 100644
--- a/include/linux/resume-trace.h
+++ b/include/linux/resume-trace.h
@@ -8,7 +8,7 @@ extern int pm_trace_enabled;
 
 struct device;
 extern void set_trace_device(struct device *);
-extern void generate_resume_trace(void *tracedata, unsigned int user);
+extern void generate_resume_trace(const void *tracedata, unsigned int user);
 
 #define TRACE_DEVICE(dev) do { \
 	if (pm_trace_enabled) \
-- 
cgit v1.2.3-70-g09d2


From a2eddfa95919a730e0e5ed17e9c303fe5ba249cd Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Mon, 12 May 2008 15:44:41 +0200
Subject: x86: make /proc/stat account for all interrupts

LAPIC interrupts, which don't go through the generic interrupt handling
code, aren't accounted for in /proc/stat. Hence this patch adds a
mechanism architectures can use to accordingly adjust the statistics.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/irq_32.c  | 38 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/irq_64.c  | 28 ++++++++++++++++++++++++++++
 fs/proc/proc_misc.c       |  9 +++++++++
 include/asm-x86/hardirq.h |  6 ++++++
 4 files changed, 81 insertions(+)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 147352df28b..468acd04aa2 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -313,16 +313,20 @@ skip:
 				per_cpu(irq_stat,j).irq_tlb_count);
 		seq_printf(p, "  TLB shootdowns\n");
 #endif
+#ifdef CONFIG_X86_MCE
 		seq_printf(p, "TRM: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ",
 				per_cpu(irq_stat,j).irq_thermal_count);
 		seq_printf(p, "  Thermal event interrupts\n");
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
 		seq_printf(p, "SPU: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ",
 				per_cpu(irq_stat,j).irq_spurious_count);
 		seq_printf(p, "  Spurious interrupts\n");
+#endif
 		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
 #if defined(CONFIG_X86_IO_APIC)
 		seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
@@ -331,6 +335,40 @@ skip:
 	return 0;
 }
 
+/*
+ * /proc/stat helpers
+ */
+u64 arch_irq_stat_cpu(unsigned int cpu)
+{
+	u64 sum = nmi_count(cpu);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	sum += per_cpu(irq_stat, cpu).apic_timer_irqs;
+#endif
+#ifdef CONFIG_SMP
+	sum += per_cpu(irq_stat, cpu).irq_resched_count;
+	sum += per_cpu(irq_stat, cpu).irq_call_count;
+	sum += per_cpu(irq_stat, cpu).irq_tlb_count;
+#endif
+#ifdef CONFIG_X86_MCE
+	sum += per_cpu(irq_stat, cpu).irq_thermal_count;
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+	sum += per_cpu(irq_stat, cpu).irq_spurious_count;
+#endif
+	return sum;
+}
+
+u64 arch_irq_stat(void)
+{
+	u64 sum = atomic_read(&irq_err_count);
+
+#ifdef CONFIG_X86_IO_APIC
+	sum += atomic_read(&irq_mis_count);
+#endif
+	return sum;
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 #include <mach_apic.h>
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 3aac15466a9..1f78b238d8d 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -135,6 +135,7 @@ skip:
 			seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
 		seq_printf(p, "  TLB shootdowns\n");
 #endif
+#ifdef CONFIG_X86_MCE
 		seq_printf(p, "TRM: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
@@ -143,6 +144,7 @@ skip:
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
 		seq_printf(p, "  Threshold APIC interrupts\n");
+#endif
 		seq_printf(p, "SPU: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
@@ -152,6 +154,32 @@ skip:
 	return 0;
 }
 
+/*
+ * /proc/stat helpers
+ */
+u64 arch_irq_stat_cpu(unsigned int cpu)
+{
+	u64 sum = cpu_pda(cpu)->__nmi_count;
+
+	sum += cpu_pda(cpu)->apic_timer_irqs;
+#ifdef CONFIG_SMP
+	sum += cpu_pda(cpu)->irq_resched_count;
+	sum += cpu_pda(cpu)->irq_call_count;
+	sum += cpu_pda(cpu)->irq_tlb_count;
+#endif
+#ifdef CONFIG_X86_MCE
+	sum += cpu_pda(cpu)->irq_thermal_count;
+	sum += cpu_pda(cpu)->irq_threshold_count;
+#endif
+	sum += cpu_pda(cpu)->irq_spurious_count;
+	return sum;
+}
+
+u64 arch_irq_stat(void)
+{
+	return atomic_read(&irq_err_count);
+}
+
 /*
  * do_IRQ handles all normal device IRQ's (the special
  * SMP cross-CPU interrupts have their own specific
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 74a323d2b85..903e617bec5 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -472,6 +472,13 @@ static const struct file_operations proc_vmalloc_operations = {
 };
 #endif
 
+#ifndef arch_irq_stat_cpu
+#define arch_irq_stat_cpu(cpu) 0
+#endif
+#ifndef arch_irq_stat
+#define arch_irq_stat() 0
+#endif
+
 static int show_stat(struct seq_file *p, void *v)
 {
 	int i;
@@ -509,7 +516,9 @@ static int show_stat(struct seq_file *p, void *v)
 			sum += temp;
 			per_irq_sum[j] += temp;
 		}
+		sum += arch_irq_stat_cpu(i);
 	}
+	sum += arch_irq_stat();
 
 	seq_printf(p, "cpu  %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
 		(unsigned long long)cputime64_to_clock_t(user),
diff --git a/include/asm-x86/hardirq.h b/include/asm-x86/hardirq.h
index 314434d664e..000787df66e 100644
--- a/include/asm-x86/hardirq.h
+++ b/include/asm-x86/hardirq.h
@@ -3,3 +3,9 @@
 #else
 # include "hardirq_64.h"
 #endif
+
+extern u64 arch_irq_stat_cpu(unsigned int cpu);
+#define arch_irq_stat_cpu	arch_irq_stat_cpu
+
+extern u64 arch_irq_stat(void);
+#define arch_irq_stat		arch_irq_stat
-- 
cgit v1.2.3-70-g09d2


From 5136dea5734cfddbc6d7ccb7ead85a3ac7ce3de2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 14 May 2008 16:10:41 -0700
Subject: x86: bitops take an unsigned long *

All (or most) other architectures do this.  So should x86.  Fix.

Cc: Andrea Arcangeli <andrea@qumranet.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/bitops.h | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index ee4b3ead6a4..7d2494bdc66 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -43,7 +43,7 @@
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
-static inline void set_bit(int nr, volatile void *addr)
+static inline void set_bit(int nr, volatile unsigned long *addr)
 {
 	asm volatile(LOCK_PREFIX "bts %1,%0" : ADDR : "Ir" (nr) : "memory");
 }
@@ -57,7 +57,7 @@ static inline void set_bit(int nr, volatile void *addr)
  * If it's called on the same region of memory simultaneously, the effect
  * may be that only one operation succeeds.
  */
-static inline void __set_bit(int nr, volatile void *addr)
+static inline void __set_bit(int nr, volatile unsigned long *addr)
 {
 	asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
 }
@@ -72,7 +72,7 @@ static inline void __set_bit(int nr, volatile void *addr)
  * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
  * in order to ensure changes are visible on other processors.
  */
-static inline void clear_bit(int nr, volatile void *addr)
+static inline void clear_bit(int nr, volatile unsigned long *addr)
 {
 	asm volatile(LOCK_PREFIX "btr %1,%0" : ADDR : "Ir" (nr));
 }
@@ -85,13 +85,13 @@ static inline void clear_bit(int nr, volatile void *addr)
  * clear_bit() is atomic and implies release semantics before the memory
  * operation. It can be used for an unlock.
  */
-static inline void clear_bit_unlock(unsigned nr, volatile void *addr)
+static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
 {
 	barrier();
 	clear_bit(nr, addr);
 }
 
-static inline void __clear_bit(int nr, volatile void *addr)
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
 {
 	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
 }
@@ -108,7 +108,7 @@ static inline void __clear_bit(int nr, volatile void *addr)
  * No memory barrier is required here, because x86 cannot reorder stores past
  * older loads. Same principle as spin_unlock.
  */
-static inline void __clear_bit_unlock(unsigned nr, volatile void *addr)
+static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
 {
 	barrier();
 	__clear_bit(nr, addr);
@@ -126,7 +126,7 @@ static inline void __clear_bit_unlock(unsigned nr, volatile void *addr)
  * If it's called on the same region of memory simultaneously, the effect
  * may be that only one operation succeeds.
  */
-static inline void __change_bit(int nr, volatile void *addr)
+static inline void __change_bit(int nr, volatile unsigned long *addr)
 {
 	asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
 }
@@ -140,7 +140,7 @@ static inline void __change_bit(int nr, volatile void *addr)
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
-static inline void change_bit(int nr, volatile void *addr)
+static inline void change_bit(int nr, volatile unsigned long *addr)
 {
 	asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr));
 }
@@ -153,7 +153,7 @@ static inline void change_bit(int nr, volatile void *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_set_bit(int nr, volatile void *addr)
+static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -170,7 +170,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr)
  *
  * This is the same as test_and_set_bit on x86.
  */
-static inline int test_and_set_bit_lock(int nr, volatile void *addr)
+static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr)
 {
 	return test_and_set_bit(nr, addr);
 }
@@ -184,7 +184,7 @@ static inline int test_and_set_bit_lock(int nr, volatile void *addr)
  * If two examples of this operation race, one can appear to succeed
  * but actually fail.  You must protect multiple accesses with a lock.
  */
-static inline int __test_and_set_bit(int nr, volatile void *addr)
+static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -203,7 +203,7 @@ static inline int __test_and_set_bit(int nr, volatile void *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_clear_bit(int nr, volatile void *addr)
+static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -223,7 +223,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
  * If two examples of this operation race, one can appear to succeed
  * but actually fail.  You must protect multiple accesses with a lock.
  */
-static inline int __test_and_clear_bit(int nr, volatile void *addr)
+static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -235,7 +235,7 @@ static inline int __test_and_clear_bit(int nr, volatile void *addr)
 }
 
 /* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr, volatile void *addr)
+static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -255,7 +255,7 @@ static inline int __test_and_change_bit(int nr, volatile void *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_change_bit(int nr, volatile void *addr)
+static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -266,13 +266,13 @@ static inline int test_and_change_bit(int nr, volatile void *addr)
 	return oldbit;
 }
 
-static inline int constant_test_bit(int nr, const volatile void *addr)
+static inline int constant_test_bit(int nr, const volatile unsigned long *addr)
 {
 	return ((1UL << (nr % BITS_PER_LONG)) &
 		(((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
 }
 
-static inline int variable_test_bit(int nr, volatile const void *addr)
+static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
 {
 	int oldbit;
 
-- 
cgit v1.2.3-70-g09d2


From 635ee418381566f03819408e1303ef21fcf2d41c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 12 May 2008 15:43:35 +0200
Subject: x86: create prototype for (un)map_devmem

Global functions need a prototype. Add it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/page.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index dc936dddf16..ed165097520 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -51,8 +51,15 @@
 
 #ifndef __ASSEMBLY__
 
+typedef struct { pgdval_t pgd; } pgd_t;
+typedef struct { pgprotval_t pgprot; } pgprot_t;
+
 extern int page_is_ram(unsigned long pagenr);
 extern int devmem_is_allowed(unsigned long pagenr);
+extern void map_devmem(unsigned long pfn, unsigned long size,
+		       pgprot_t vma_prot);
+extern void unmap_devmem(unsigned long pfn, unsigned long size,
+			 pgprot_t vma_prot);
 
 extern unsigned long max_pfn_mapped;
 
@@ -74,9 +81,6 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
 	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
-typedef struct { pgdval_t pgd; } pgd_t;
-typedef struct { pgprotval_t pgprot; } pgprot_t;
-
 static inline pgd_t native_make_pgd(pgdval_t val)
 {
 	return (pgd_t) { val };
-- 
cgit v1.2.3-70-g09d2


From d1097635deed7196c2a859f285c29267779ca45a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 2 May 2008 23:42:01 +0200
Subject: x86: move mmconfig declarations to header

arch/x86/kernel/mmconf-fam10h_64.c is missing the prototypes, which
are decalred in arch/x86/kernel/setup_64.c. Move the prototypes and
the inline stubs to the appropriate header file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/mmconf-fam10h_64.c |  1 +
 arch/x86/kernel/setup_64.c         | 13 +------------
 include/asm-x86/mmconfig.h         | 12 ++++++++++++
 3 files changed, 14 insertions(+), 12 deletions(-)
 create mode 100644 include/asm-x86/mmconfig.h

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index edc5fbfe85c..fdfdc550b36 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -12,6 +12,7 @@
 #include <asm/io.h>
 #include <asm/msr.h>
 #include <asm/acpi.h>
+#include <asm/mmconfig.h>
 
 #include "../pci/pci.h"
 
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 6dff1286ad8..341230db74e 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -71,6 +71,7 @@
 #include <asm/topology.h>
 #include <asm/trampoline.h>
 #include <asm/pat.h>
+#include <asm/mmconfig.h>
 
 #include <mach_apic.h>
 #ifdef CONFIG_PARAVIRT
@@ -293,18 +294,6 @@ static void __init parse_setup_data(void)
 	}
 }
 
-#ifdef CONFIG_PCI_MMCONFIG
-extern void __cpuinit fam10h_check_enable_mmcfg(void);
-extern void __init check_enable_amd_mmconf_dmi(void);
-#else
-void __cpuinit fam10h_check_enable_mmcfg(void)
-{
-}
-void __init check_enable_amd_mmconf_dmi(void)
-{
-}
-#endif
-
 /*
  * setup_arch - architecture-specific boot-time initializations
  *
diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h
new file mode 100644
index 00000000000..95beda07c6f
--- /dev/null
+++ b/include/asm-x86/mmconfig.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_MMCONFIG_H
+#define _ASM_MMCONFIG_H
+
+#ifdef CONFIG_PCI_MMCONFIG
+extern void __cpuinit fam10h_check_enable_mmcfg(void);
+extern void __init check_enable_amd_mmconf_dmi(void);
+#else
+static inline void fam10h_check_enable_mmcfg(void) { }
+static inline void check_enable_amd_mmconf_dmi(void) { }
+#endif
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 4c8ab98249fa3cead320ec0ee4cde9960b951989 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Mon, 12 May 2008 15:43:38 +0200
Subject: i386: move FIX_ACPI_* into non-permanent range

.. as they are used at early boot time only.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/fixmap_32.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h
index 4b96148e90c..f0df7ee9681 100644
--- a/include/asm-x86/fixmap_32.h
+++ b/include/asm-x86/fixmap_32.h
@@ -79,10 +79,6 @@ enum fixed_addresses {
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
 #endif
-#ifdef CONFIG_ACPI
-	FIX_ACPI_BEGIN,
-	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
-#endif
 #ifdef CONFIG_PCI_MMCONFIG
 	FIX_PCIE_MCFG,
 #endif
@@ -103,6 +99,10 @@ enum fixed_addresses {
 			(__end_of_permanent_fixed_addresses & 511),
 	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
 	FIX_WP_TEST,
+#ifdef CONFIG_ACPI
+	FIX_ACPI_BEGIN,
+	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
+#endif
 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
 	FIX_OHCI1394_BASE,
 #endif
-- 
cgit v1.2.3-70-g09d2


From ebdd561a19d7917ac49fff9c355aa4833c504bf1 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Mon, 12 May 2008 15:43:38 +0200
Subject: x86: constify data in reboot.c

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/reboot.c           | 18 +++++++++---------
 arch/x86/kernel/reboot_fixups_32.c |  4 ++--
 include/asm-x86/reboot.h           |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f6be7d5f82f..f8a62160e15 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -27,7 +27,7 @@
 void (*pm_power_off)(void);
 EXPORT_SYMBOL(pm_power_off);
 
-static long no_idt[3];
+static const struct desc_ptr no_idt = {};
 static int reboot_mode;
 enum reboot_type reboot_type = BOOT_KBD;
 int reboot_force;
@@ -201,15 +201,15 @@ core_initcall(reboot_init);
    controller to pulse the CPU reset line, which is more thorough, but
    doesn't work with at least one type of 486 motherboard.  It is easy
    to stop this code working; hence the copious comments. */
-static unsigned long long
+static const unsigned long long
 real_mode_gdt_entries [3] =
 {
 	0x0000000000000000ULL,	/* Null descriptor */
-	0x00009a000000ffffULL,	/* 16-bit real-mode 64k code at 0x00000000 */
-	0x000092000100ffffULL	/* 16-bit real-mode 64k data at 0x00000100 */
+	0x00009b000000ffffULL,	/* 16-bit real-mode 64k code at 0x00000000 */
+	0x000093000100ffffULL	/* 16-bit real-mode 64k data at 0x00000100 */
 };
 
-static struct desc_ptr
+static const struct desc_ptr
 real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries },
 real_mode_idt = { 0x3ff, 0 };
 
@@ -231,7 +231,7 @@ real_mode_idt = { 0x3ff, 0 };
 
    More could be done here to set up the registers as if a CPU reset had
    occurred; hopefully real BIOSs don't assume much. */
-static unsigned char real_mode_switch [] =
+static const unsigned char real_mode_switch [] =
 {
 	0x66, 0x0f, 0x20, 0xc0,			/*    movl  %cr0,%eax        */
 	0x66, 0x83, 0xe0, 0x11,			/*    andl  $0x00000011,%eax */
@@ -245,7 +245,7 @@ static unsigned char real_mode_switch [] =
 	0x24, 0x10,				/* f: andb  $0x10,al         */
 	0x66, 0x0f, 0x22, 0xc0			/*    movl  %eax,%cr0        */
 };
-static unsigned char jump_to_bios [] =
+static const unsigned char jump_to_bios [] =
 {
 	0xea, 0x00, 0x00, 0xff, 0xff		/*    ljmp  $0xffff,$0x0000  */
 };
@@ -255,7 +255,7 @@ static unsigned char jump_to_bios [] =
  * specified by the code and length parameters.
  * We assume that length will aways be less that 100!
  */
-void machine_real_restart(unsigned char *code, int length)
+void machine_real_restart(const unsigned char *code, int length)
 {
 	local_irq_disable();
 
@@ -368,7 +368,7 @@ static void native_machine_emergency_restart(void)
 			}
 
 		case BOOT_TRIPLE:
-			load_idt((const struct desc_ptr *)&no_idt);
+			load_idt(&no_idt);
 			__asm__ __volatile__("int3");
 
 			reboot_type = BOOT_KBD;
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index dec0b5ec25c..61a837743fe 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -49,7 +49,7 @@ struct device_fixup {
 	void (*reboot_fixup)(struct pci_dev *);
 };
 
-static struct device_fixup fixups_table[] = {
+static const struct device_fixup fixups_table[] = {
 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
 { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
@@ -64,7 +64,7 @@ static struct device_fixup fixups_table[] = {
  */
 void mach_reboot_fixups(void)
 {
-	struct device_fixup *cur;
+	const struct device_fixup *cur;
 	struct pci_dev *dev;
 	int i;
 
diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h
index e63741f1939..206f355786d 100644
--- a/include/asm-x86/reboot.h
+++ b/include/asm-x86/reboot.h
@@ -14,8 +14,8 @@ struct machine_ops {
 
 extern struct machine_ops machine_ops;
 
-void machine_real_restart(unsigned char *code, int length);
 void native_machine_crash_shutdown(struct pt_regs *regs);
 void native_machine_shutdown(void);
+void machine_real_restart(const unsigned char *code, int length);
 
 #endif	/* _ASM_REBOOT_H */
-- 
cgit v1.2.3-70-g09d2


From 9831bfb201a8e00415d0f9e5b5a50d902a90b2db Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Mon, 12 May 2008 15:43:38 +0200
Subject: x86 - hide X86_VM_MASK from userland programs v3

X86_VM_MASK is kernel specific flags so hide it from userland programs.

It should be defined *before* ptrace.h inclusion because of circular
link between these files

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/vm86.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/vm86.h b/include/asm-x86/vm86.h
index 074b357146d..cbf4a0effa7 100644
--- a/include/asm-x86/vm86.h
+++ b/include/asm-x86/vm86.h
@@ -14,12 +14,6 @@
 
 #include <asm/processor-flags.h>
 
-#ifdef CONFIG_VM86
-#define X86_VM_MASK	X86_EFLAGS_VM
-#else
-#define X86_VM_MASK	0 /* No VM86 support */
-#endif
-
 #define BIOSSEG		0x0f000
 
 #define CPU_086		0
@@ -133,6 +127,13 @@ struct vm86plus_struct {
 };
 
 #ifdef __KERNEL__
+
+#ifdef CONFIG_VM86
+#define X86_VM_MASK	X86_EFLAGS_VM
+#else
+#define X86_VM_MASK	0 /* No VM86 support */
+#endif
+
 /*
  * This is the (kernel) stack-layout when we have done a "SAVE_ALL" from vm86
  * mode - the main change is that the old segment descriptors aren't
-- 
cgit v1.2.3-70-g09d2


From 2237ce2057b82561f7ea27ed30153571f404112d Mon Sep 17 00:00:00 2001
From: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Date: Fri, 16 May 2008 11:01:26 +0900
Subject: x86: cleanup, remove duplicate declaration of unknown_nmi_panic

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/nmi.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index 1e363021e72..8455bf36d8d 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h
@@ -46,7 +46,6 @@ extern void nmi_watchdog_default(void);
 
 extern int check_nmi_watchdog(void);
 extern int nmi_watchdog_enabled;
-extern int unknown_nmi_panic;
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int avail_to_resrv_perfctr_nmi(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
-- 
cgit v1.2.3-70-g09d2


From 23eb271b9186531e1eb704dda9ab37abcfd0ca4a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 14 May 2008 16:10:39 -0700
Subject: x86: setup_force_cpu_cap(): don't do clear_bit(non-unsigned-long)

Another hack to make proper prototyping of x86 bitops viable.

Cc: Andrea Arcangeli <andrea@qumranet.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/cpufeature.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 0d609c837a4..78b47e7404e 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -142,11 +142,11 @@ extern const char * const x86_power_flags[32];
 #define clear_cpu_cap(c, bit)	clear_bit(bit, (unsigned long *)((c)->x86_capability))
 #define setup_clear_cpu_cap(bit) do { \
 	clear_cpu_cap(&boot_cpu_data, bit);	\
-	set_bit(bit, cleared_cpu_caps); 	\
+	set_bit(bit, (unsigned long *)cleared_cpu_caps); \
 } while (0)
 #define setup_force_cpu_cap(bit) do { \
 	set_cpu_cap(&boot_cpu_data, bit);	\
-	clear_bit(bit, cleared_cpu_caps); 	\
+	clear_bit(bit, (unsigned long *)cleared_cpu_caps); 	\
 } while (0)
 
 #define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
-- 
cgit v1.2.3-70-g09d2


From f0766440dda7ace8a43b030f75e2dcb82449fb85 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Fri, 9 May 2008 19:09:48 -0700
Subject: x86: unify current.h

Simply stitch these together. There are just two definitions that are shared
but the file is resonably small and putting these things together shows that
further unifications requires a unification of the per cpu / pda handling
between both arches.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/current.h    | 42 ++++++++++++++++++++++++++++++++++++++----
 include/asm-x86/current_32.h | 17 -----------------
 include/asm-x86/current_64.h | 27 ---------------------------
 3 files changed, 38 insertions(+), 48 deletions(-)
 delete mode 100644 include/asm-x86/current_32.h
 delete mode 100644 include/asm-x86/current_64.h

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/current.h b/include/asm-x86/current.h
index d2526d3f734..7515c19d498 100644
--- a/include/asm-x86/current.h
+++ b/include/asm-x86/current.h
@@ -1,5 +1,39 @@
+#ifndef _X86_CURRENT_H
+#define _X86_CURRENT_H
+
 #ifdef CONFIG_X86_32
-# include "current_32.h"
-#else
-# include "current_64.h"
-#endif
+#include <linux/compiler.h>
+#include <asm/percpu.h>
+
+struct task_struct;
+
+DECLARE_PER_CPU(struct task_struct *, current_task);
+static __always_inline struct task_struct *get_current(void)
+{
+	return x86_read_percpu(current_task);
+}
+
+#else /* X86_32 */
+
+#ifndef __ASSEMBLY__
+#include <asm/pda.h>
+
+struct task_struct;
+
+static __always_inline struct task_struct *get_current(void)
+{
+	return read_pda(pcurrent);
+}
+
+#else /* __ASSEMBLY__ */
+
+#include <asm/asm-offsets.h>
+#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* X86_32 */
+
+#define current get_current()
+
+#endif /* X86_CURRENT_H */
diff --git a/include/asm-x86/current_32.h b/include/asm-x86/current_32.h
deleted file mode 100644
index 5af9bdb97a1..00000000000
--- a/include/asm-x86/current_32.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _I386_CURRENT_H
-#define _I386_CURRENT_H
-
-#include <linux/compiler.h>
-#include <asm/percpu.h>
-
-struct task_struct;
-
-DECLARE_PER_CPU(struct task_struct *, current_task);
-static __always_inline struct task_struct *get_current(void)
-{
-	return x86_read_percpu(current_task);
-}
-
-#define current get_current()
-
-#endif /* !(_I386_CURRENT_H) */
diff --git a/include/asm-x86/current_64.h b/include/asm-x86/current_64.h
deleted file mode 100644
index 2d368ede2fc..00000000000
--- a/include/asm-x86/current_64.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef _X86_64_CURRENT_H
-#define _X86_64_CURRENT_H
-
-#if !defined(__ASSEMBLY__)
-struct task_struct;
-
-#include <asm/pda.h>
-
-static inline struct task_struct *get_current(void)
-{
-	struct task_struct *t = read_pda(pcurrent);
-	return t;
-}
-
-#define current get_current()
-
-#else
-
-#ifndef ASM_OFFSET_H
-#include <asm/asm-offsets.h>
-#endif
-
-#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
-
-#endif
-
-#endif /* !(_X86_64_CURRENT_H) */
-- 
cgit v1.2.3-70-g09d2


From 78d64fc21d2aa425c65de49765cddecc84d595a4 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 12 May 2008 15:44:39 +0200
Subject: x86: include/asm-x86/string_32.h - style only

Looked at this file because of __memcpy warnings.
Thought it could use a style/checkpatch cleanup.

No change in vmlinux.

[tglx: fixed the remaining issues ]

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/string_32.h | 316 +++++++++++++++++++++++++-------------------
 1 file changed, 181 insertions(+), 135 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h
index b49369ad9a6..8d0c593c441 100644
--- a/include/asm-x86/string_32.h
+++ b/include/asm-x86/string_32.h
@@ -29,81 +29,116 @@ extern char *strchr(const char *s, int c);
 #define __HAVE_ARCH_STRLEN
 extern size_t strlen(const char *s);
 
-static __always_inline void * __memcpy(void * to, const void * from, size_t n)
+static __always_inline void *__memcpy(void *to, const void *from, size_t n)
 {
-int d0, d1, d2;
-__asm__ __volatile__(
-	"rep ; movsl\n\t"
-	"movl %4,%%ecx\n\t"
-	"andl $3,%%ecx\n\t"
-	"jz 1f\n\t"
-	"rep ; movsb\n\t"
-	"1:"
-	: "=&c" (d0), "=&D" (d1), "=&S" (d2)
-	: "0" (n/4), "g" (n), "1" ((long) to), "2" ((long) from)
-	: "memory");
-return (to);
+	int d0, d1, d2;
+	asm volatile("rep ; movsl\n\t"
+		     "movl %4,%%ecx\n\t"
+		     "andl $3,%%ecx\n\t"
+		     "jz 1f\n\t"
+		     "rep ; movsb\n\t"
+		     "1:"
+		     : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+		     : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
+		     : "memory");
+	return to;
 }
 
 /*
  * This looks ugly, but the compiler can optimize it totally,
  * as the count is constant.
  */
-static __always_inline void * __constant_memcpy(void * to, const void * from, size_t n)
+static __always_inline void *__constant_memcpy(void *to, const void *from,
+					       size_t n)
 {
 	long esi, edi;
-	if (!n) return to;
-#if 1	/* want to do small copies with non-string ops? */
+	if (!n)
+		return to;
+
 	switch (n) {
-		case 1: *(char*)to = *(char*)from; return to;
-		case 2: *(short*)to = *(short*)from; return to;
-		case 4: *(int*)to = *(int*)from; return to;
-#if 1	/* including those doable with two moves? */
-		case 3: *(short*)to = *(short*)from;
-			*((char*)to+2) = *((char*)from+2); return to;
-		case 5: *(int*)to = *(int*)from;
-			*((char*)to+4) = *((char*)from+4); return to;
-		case 6: *(int*)to = *(int*)from;
-			*((short*)to+2) = *((short*)from+2); return to;
-		case 8: *(int*)to = *(int*)from;
-			*((int*)to+1) = *((int*)from+1); return to;
-#endif
+	case 1:
+		*(char *)to = *(char *)from;
+		return to;
+	case 2:
+		*(short *)to = *(short *)from;
+		return to;
+	case 4:
+		*(int *)to = *(int *)from;
+		return to;
+
+	case 3:
+		*(short *)to = *(short *)from;
+		*((char *)to + 2) = *((char *)from + 2);
+		return to;
+	case 5:
+		*(int *)to = *(int *)from;
+		*((char *)to + 4) = *((char *)from + 4);
+		return to;
+	case 6:
+		*(int *)to = *(int *)from;
+		*((short *)to + 2) = *((short *)from + 2);
+		return to;
+	case 8:
+		*(int *)to = *(int *)from;
+		*((int *)to + 1) = *((int *)from + 1);
+		return to;
 	}
-#endif
-	esi = (long) from;
-	edi = (long) to;
-	if (n >= 5*4) {
+
+	esi = (long)from;
+	edi = (long)to;
+	if (n >= 5 * 4) {
 		/* large block: use rep prefix */
 		int ecx;
-		__asm__ __volatile__(
-			"rep ; movsl"
-			: "=&c" (ecx), "=&D" (edi), "=&S" (esi)
-			: "0" (n/4), "1" (edi),"2" (esi)
-			: "memory"
+		asm volatile("rep ; movsl"
+			     : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
+			     : "0" (n / 4), "1" (edi), "2" (esi)
+			     : "memory"
 		);
 	} else {
 		/* small block: don't clobber ecx + smaller code */
-		if (n >= 4*4) __asm__ __volatile__("movsl"
-			:"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
-		if (n >= 3*4) __asm__ __volatile__("movsl"
-			:"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
-		if (n >= 2*4) __asm__ __volatile__("movsl"
-			:"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
-		if (n >= 1*4) __asm__ __volatile__("movsl"
-			:"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
+		if (n >= 4 * 4)
+			asm volatile("movsl"
+				     : "=&D"(edi), "=&S"(esi)
+				     : "0"(edi), "1"(esi)
+				     : "memory");
+		if (n >= 3 * 4)
+			asm volatile("movsl"
+				     : "=&D"(edi), "=&S"(esi)
+				     : "0"(edi), "1"(esi)
+				     : "memory");
+		if (n >= 2 * 4)
+			asm volatile("movsl"
+				     : "=&D"(edi), "=&S"(esi)
+				     : "0"(edi), "1"(esi)
+				     : "memory");
+		if (n >= 1 * 4)
+			asm volatile("movsl"
+				     : "=&D"(edi), "=&S"(esi)
+				     : "0"(edi), "1"(esi)
+				     : "memory");
 	}
 	switch (n % 4) {
 		/* tail */
-		case 0: return to;
-		case 1: __asm__ __volatile__("movsb"
-			:"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
-			return to;
-		case 2: __asm__ __volatile__("movsw"
-			:"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
-			return to;
-		default: __asm__ __volatile__("movsw\n\tmovsb"
-			:"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
-			return to;
+	case 0:
+		return to;
+	case 1:
+		asm volatile("movsb"
+			     : "=&D"(edi), "=&S"(esi)
+			     : "0"(edi), "1"(esi)
+			     : "memory");
+		return to;
+	case 2:
+		asm volatile("movsw"
+			     : "=&D"(edi), "=&S"(esi)
+			     : "0"(edi), "1"(esi)
+			     : "memory");
+		return to;
+	default:
+		asm volatile("movsw\n\tmovsb"
+			     : "=&D"(edi), "=&S"(esi)
+			     : "0"(edi), "1"(esi)
+			     : "memory");
+		return to;
 	}
 }
 
@@ -117,87 +152,86 @@ static __always_inline void * __constant_memcpy(void * to, const void * from, si
  *	This CPU favours 3DNow strongly (eg AMD Athlon)
  */
 
-static inline void * __constant_memcpy3d(void * to, const void * from, size_t len)
+static inline void *__constant_memcpy3d(void *to, const void *from, size_t len)
 {
 	if (len < 512)
 		return __constant_memcpy(to, from, len);
 	return _mmx_memcpy(to, from, len);
 }
 
-static __inline__ void *__memcpy3d(void *to, const void *from, size_t len)
+static inline void *__memcpy3d(void *to, const void *from, size_t len)
 {
 	if (len < 512)
 		return __memcpy(to, from, len);
 	return _mmx_memcpy(to, from, len);
 }
 
-#define memcpy(t, f, n) \
-(__builtin_constant_p(n) ? \
- __constant_memcpy3d((t),(f),(n)) : \
- __memcpy3d((t),(f),(n)))
+#define memcpy(t, f, n)				\
+	(__builtin_constant_p((n))		\
+	 ? __constant_memcpy3d((t), (f), (n))	\
+	 : __memcpy3d((t), (f), (n)))
 
 #else
 
 /*
  *	No 3D Now!
  */
- 
-#define memcpy(t, f, n) \
-(__builtin_constant_p(n) ? \
- __constant_memcpy((t),(f),(n)) : \
- __memcpy((t),(f),(n)))
+
+#define memcpy(t, f, n)				\
+	(__builtin_constant_p((n))		\
+	 ? __constant_memcpy((t), (f), (n))	\
+	 : __memcpy((t), (f), (n)))
 
 #endif
 
 #define __HAVE_ARCH_MEMMOVE
-void *memmove(void * dest,const void * src, size_t n);
+void *memmove(void *dest, const void *src, size_t n);
 
 #define memcmp __builtin_memcmp
 
 #define __HAVE_ARCH_MEMCHR
-extern void *memchr(const void * cs,int c,size_t count);
+extern void *memchr(const void *cs, int c, size_t count);
 
-static inline void * __memset_generic(void * s, char c,size_t count)
+static inline void *__memset_generic(void *s, char c, size_t count)
 {
-int d0, d1;
-__asm__ __volatile__(
-	"rep\n\t"
-	"stosb"
-	: "=&c" (d0), "=&D" (d1)
-	:"a" (c),"1" (s),"0" (count)
-	:"memory");
-return s;
+	int d0, d1;
+	asm volatile("rep\n\t"
+		     "stosb"
+		     : "=&c" (d0), "=&D" (d1)
+		     : "a" (c), "1" (s), "0" (count)
+		     : "memory");
+	return s;
 }
 
 /* we might want to write optimized versions of these later */
-#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count))
+#define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count))
 
 /*
- * memset(x,0,y) is a reasonably common thing to do, so we want to fill
+ * memset(x, 0, y) is a reasonably common thing to do, so we want to fill
  * things 32 bits at a time even when we don't know the size of the
  * area at compile-time..
  */
-static __always_inline void * __constant_c_memset(void * s, unsigned long c, size_t count)
+static __always_inline
+void *__constant_c_memset(void *s, unsigned long c, size_t count)
 {
-int d0, d1;
-__asm__ __volatile__(
-	"rep ; stosl\n\t"
-	"testb $2,%b3\n\t"
-	"je 1f\n\t"
-	"stosw\n"
-	"1:\ttestb $1,%b3\n\t"
-	"je 2f\n\t"
-	"stosb\n"
-	"2:"
-	:"=&c" (d0), "=&D" (d1)
-	:"a" (c), "q" (count), "0" (count/4), "1" ((long) s)
-	:"memory");
-return (s);	
+	int d0, d1;
+	asm volatile("rep ; stosl\n\t"
+		     "testb $2,%b3\n\t"
+		     "je 1f\n\t"
+		     "stosw\n"
+		     "1:\ttestb $1,%b3\n\t"
+		     "je 2f\n\t"
+		     "stosb\n"
+		     "2:"
+		     : "=&c" (d0), "=&D" (d1)
+		     : "a" (c), "q" (count), "0" (count/4), "1" ((long)s)
+		     : "memory");
+	return s;
 }
 
 /* Added by Gertjan van Wingerde to make minix and sysv module work */
 #define __HAVE_ARCH_STRNLEN
-extern size_t strnlen(const char * s, size_t count);
+extern size_t strnlen(const char *s, size_t count);
 /* end of additional stuff */
 
 #define __HAVE_ARCH_STRSTR
@@ -207,66 +241,78 @@ extern char *strstr(const char *cs, const char *ct);
  * This looks horribly ugly, but the compiler can optimize it totally,
  * as we by now know that both pattern and count is constant..
  */
-static __always_inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count)
+static __always_inline
+void *__constant_c_and_count_memset(void *s, unsigned long pattern,
+				    size_t count)
 {
 	switch (count) {
+	case 0:
+		return s;
+	case 1:
+		*(unsigned char *)s = pattern & 0xff;
+		return s;
+	case 2:
+		*(unsigned short *)s = pattern & 0xffff;
+		return s;
+	case 3:
+		*(unsigned short *)s = pattern & 0xffff;
+		*((unsigned char *)s + 2) = pattern & 0xff;
+		return s;
+	case 4:
+		*(unsigned long *)s = pattern;
+		return s;
+	}
+
+#define COMMON(x)							\
+	asm volatile("rep ; stosl"					\
+		     x							\
+		     : "=&c" (d0), "=&D" (d1)				\
+		     : "a" (pattern), "0" (count/4), "1" ((long)s)	\
+		     : "memory")
+
+	{
+		int d0, d1;
+		switch (count % 4) {
 		case 0:
+			COMMON("");
 			return s;
 		case 1:
-			*(unsigned char *)s = pattern & 0xff;
+			COMMON("\n\tstosb");
 			return s;
 		case 2:
-			*(unsigned short *)s = pattern & 0xffff;
+			COMMON("\n\tstosw");
 			return s;
-		case 3:
-			*(unsigned short *)s = pattern & 0xffff;
-			*(2+(unsigned char *)s) = pattern & 0xff;
+		default:
+			COMMON("\n\tstosw\n\tstosb");
 			return s;
-		case 4:
-			*(unsigned long *)s = pattern;
-			return s;
-	}
-#define COMMON(x) \
-__asm__  __volatile__( \
-	"rep ; stosl" \
-	x \
-	: "=&c" (d0), "=&D" (d1) \
-	: "a" (pattern),"0" (count/4),"1" ((long) s) \
-	: "memory")
-{
-	int d0, d1;
-	switch (count % 4) {
-		case 0: COMMON(""); return s;
-		case 1: COMMON("\n\tstosb"); return s;
-		case 2: COMMON("\n\tstosw"); return s;
-		default: COMMON("\n\tstosw\n\tstosb"); return s;
+		}
 	}
-}
-  
+
 #undef COMMON
 }
 
-#define __constant_c_x_memset(s, c, count) \
-(__builtin_constant_p(count) ? \
- __constant_c_and_count_memset((s),(c),(count)) : \
- __constant_c_memset((s),(c),(count)))
+#define __constant_c_x_memset(s, c, count)			\
+	(__builtin_constant_p(count)				\
+	 ? __constant_c_and_count_memset((s), (c), (count))	\
+	 : __constant_c_memset((s), (c), (count)))
 
-#define __memset(s, c, count) \
-(__builtin_constant_p(count) ? \
- __constant_count_memset((s),(c),(count)) : \
- __memset_generic((s),(c),(count)))
+#define __memset(s, c, count)				\
+	(__builtin_constant_p(count)			\
+	 ? __constant_count_memset((s), (c), (count))	\
+	 : __memset_generic((s), (c), (count)))
 
 #define __HAVE_ARCH_MEMSET
-#define memset(s, c, count) \
-(__builtin_constant_p(c) ? \
- __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \
- __memset((s),(c),(count)))
+#define memset(s, c, count)						\
+	(__builtin_constant_p(c)					\
+	 ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \
+				 (count))				\
+	 : __memset((s), (c), (count)))
 
 /*
  * find the first occurrence of byte 'c', or 1 past the area if none
  */
 #define __HAVE_ARCH_MEMSCAN
-extern void *memscan(void * addr, int c, size_t size);
+extern void *memscan(void *addr, int c, size_t size);
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3-70-g09d2


From 0dbfafa5fcd4dd189e2adc7b6ed9e0405e846d79 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Wed, 23 Apr 2008 15:09:05 +0200
Subject: x86: move i386 memory setup code to e820_32.c

The x86_64 code has centralized the memory setup code in
e820_64.c. This patch copies that approach to i386:

- early_param("mem", ...) parsing is moved from
setup_32.c to e820_32.c.

- setup_memory_map() and finish_e820_parsing() are
factored out from setup_arch(), and declarations
are added to e820_32.h.

- print_memory_map() is made static and removed from
e820_32.h.

- user_defined_memmap is marked as __initdata.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/e820_32.c  | 60 ++++++++++++++++++++++++++++++++++++++++++++--
 arch/x86/kernel/setup_32.c | 50 ++------------------------------------
 include/asm-x86/e820_32.h  |  4 +++-
 3 files changed, 63 insertions(+), 51 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index ed733e7cf4e..31ea2bb8c91 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -30,7 +30,6 @@ unsigned long pci_mem_start = 0x10000000;
 #ifdef CONFIG_PCI
 EXPORT_SYMBOL(pci_mem_start);
 #endif
-extern int user_defined_memmap;
 
 static struct resource system_rom_resource = {
 	.name	= "System ROM",
@@ -584,7 +583,7 @@ void __init e820_register_memory(void)
 		pci_mem_start, gapstart, gapsize);
 }
 
-void __init print_memory_map(char *who)
+static void __init print_memory_map(char *who)
 {
 	int i;
 
@@ -692,6 +691,54 @@ e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
 	return 0;
 }
 
+/* Overridden in paravirt.c if CONFIG_PARAVIRT */
+char * __init __attribute__((weak)) memory_setup(void)
+{
+	return machine_specific_memory_setup();
+}
+
+void __init setup_memory_map(void)
+{
+	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+	print_memory_map(memory_setup());
+}
+
+static int __initdata user_defined_memmap;
+
+/*
+ * "mem=nopentium" disables the 4MB page tables.
+ * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
+ * to <mem>, overriding the bios size.
+ * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
+ * <start> to <start>+<mem>, overriding the bios size.
+ *
+ * HPA tells me bootloaders need to parse mem=, so no new
+ * option should be mem=  [also see Documentation/i386/boot.txt]
+ */
+static int __init parse_mem(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	if (strcmp(arg, "nopentium") == 0) {
+		setup_clear_cpu_cap(X86_FEATURE_PSE);
+	} else {
+		/* If the user specifies memory size, we
+		 * limit the BIOS-provided memory map to
+		 * that size. exactmap can be used to specify
+		 * the exact map. mem=number can be used to
+		 * trim the existing memory map.
+		 */
+		unsigned long long mem_size;
+
+		mem_size = memparse(arg, &arg);
+		limit_regions(mem_size);
+		user_defined_memmap = 1;
+	}
+	return 0;
+}
+early_param("mem", parse_mem);
+
 static int __init parse_memmap(char *arg)
 {
 	if (!arg)
@@ -762,6 +809,15 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type,
 					 new_type);
 	}
 }
+
+void __init finish_e820_parsing(void)
+{
+	if (user_defined_memmap) {
+		printk(KERN_INFO "user-defined physical RAM map:\n");
+		print_memory_map("user");
+	}
+}
+
 void __init update_e820(void)
 {
 	u8 nr_map;
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 2c5f8b213e8..b54c79c91ef 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -237,42 +237,6 @@ static inline void copy_edd(void)
 }
 #endif
 
-int __initdata user_defined_memmap;
-
-/*
- * "mem=nopentium" disables the 4MB page tables.
- * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
- * to <mem>, overriding the bios size.
- * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
- * <start> to <start>+<mem>, overriding the bios size.
- *
- * HPA tells me bootloaders need to parse mem=, so no new
- * option should be mem=  [also see Documentation/i386/boot.txt]
- */
-static int __init parse_mem(char *arg)
-{
-	if (!arg)
-		return -EINVAL;
-
-	if (strcmp(arg, "nopentium") == 0) {
-		setup_clear_cpu_cap(X86_FEATURE_PSE);
-	} else {
-		/* If the user specifies memory size, we
-		 * limit the BIOS-provided memory map to
-		 * that size. exactmap can be used to specify
-		 * the exact map. mem=number can be used to
-		 * trim the existing memory map.
-		 */
-		unsigned long long mem_size;
-
-		mem_size = memparse(arg, &arg);
-		limit_regions(mem_size);
-		user_defined_memmap = 1;
-	}
-	return 0;
-}
-early_param("mem", parse_mem);
-
 #ifdef CONFIG_PROC_VMCORE
 /* elfcorehdr= specifies the location of elf core header
  * stored by the crashed kernel.
@@ -725,12 +689,6 @@ static void set_mca_bus(int x)
 static void set_mca_bus(int x) { }
 #endif
 
-/* Overridden in paravirt.c if CONFIG_PARAVIRT */
-char * __init __attribute__((weak)) memory_setup(void)
-{
-	return machine_specific_memory_setup();
-}
-
 #ifdef CONFIG_NUMA
 /*
  * In the golden day, when everything among i386 and x86_64 will be
@@ -786,8 +744,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
 	ARCH_SETUP
 
-	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
-	print_memory_map(memory_setup());
+	setup_memory_map();
 
 	copy_edd();
 
@@ -807,10 +764,7 @@ void __init setup_arch(char **cmdline_p)
 
 	parse_early_param();
 
-	if (user_defined_memmap) {
-		printk(KERN_INFO "user-defined physical RAM map:\n");
-		print_memory_map("user");
-	}
+	finish_e820_parsing();
 
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index a9f7c6ec32b..e1f10c60901 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -18,6 +18,9 @@
 
 #ifndef __ASSEMBLY__
 
+extern void setup_memory_map(void);
+extern void finish_e820_parsing(void);
+
 extern struct e820map e820;
 extern void update_e820(void);
 
@@ -32,7 +35,6 @@ extern void update_memory_range(u64 start, u64 size, unsigned old_type,
 				unsigned new_type);
 extern void e820_register_memory(void);
 extern void limit_regions(unsigned long long size);
-extern void print_memory_map(char *who);
 extern void init_iomem_resources(struct resource *code_resource,
 				 struct resource *data_resource,
 				 struct resource *bss_resource);
-- 
cgit v1.2.3-70-g09d2


From 42651f15824d003e8357693ab72c4dbb3e280836 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel.send@gmail.com>
Date: Tue, 29 Apr 2008 01:59:49 -0700
Subject: x86: fix trimming e820 with MTRR holes.

converting MTRR layout from continous to discrete, some time could run out of
MTRRs. So add gran_sizek to prevent that by dumpping small RAM piece less than
gran_sizek.

previous trimming only can handle highest_pfn from mtrr to end_pfn from e820.
when have more than 4g RAM installed, there will be holes below 4g. so need to
check ram below 4g is coverred well.

need to be applied after
	[PATCH] x86: mtrr cleanup for converting continuous to discrete layout v7

Signed-off-by: Yinghai Lu <yinghai.lu@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mtrr/main.c | 101 +++++++++++++++++++++++++++++++++-------
 arch/x86/kernel/e820_32.c       |   7 ++-
 arch/x86/kernel/e820_64.c       |   6 ++-
 include/asm-x86/e820_32.h       |   2 +-
 include/asm-x86/e820_64.h       |   2 +-
 5 files changed, 97 insertions(+), 21 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 8a6f68b45e3..9ab5c16b0d5 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -1095,6 +1095,17 @@ int __init amd_special_default_mtrr(void)
 	return 0;
 }
 
+static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
+{
+	u64 trim_start, trim_size;
+	trim_start =  start_pfn;
+	trim_start <<= PAGE_SHIFT;
+	trim_size = limit_pfn;
+	trim_size <<= PAGE_SHIFT;
+	trim_size -= trim_start;
+	return update_memory_range(trim_start, trim_size, E820_RAM,
+				E820_RESERVED);
+}
 /**
  * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
  * @end_pfn: ending page frame number
@@ -1110,8 +1121,13 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 {
 	unsigned long i, base, size, highest_pfn = 0, def, dummy;
 	mtrr_type type;
-	u64 trim_start, trim_size;
+	struct res_range range[RANGE_NUM];
+	int nr_range;
+	u64 total_real_trim_size;
+	int changed;
 
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
 	/*
 	 * Make sure we only trim uncachable memory on machines that
 	 * support the Intel MTRR architecture:
@@ -1123,9 +1139,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
-	if (amd_special_default_mtrr())
-		return 0;
-
 	/* Find highest cached pfn */
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i, &base, &size, &type);
@@ -1145,26 +1158,80 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 		return 0;
 	}
 
-	if (highest_pfn < end_pfn) {
+	/* check entries number */
+	memset(num, 0, sizeof(num));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type >= MTRR_NUM_TYPES)
+			continue;
+		if (!size)
+			type = MTRR_NUM_TYPES;
+		num[type]++;
+	}
+
+	/* no entry for WB? */
+	if (!num[MTRR_TYPE_WRBACK])
+		return 0;
+
+	/* check if we only had WB and UC */
+	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+		num_var_ranges - num[MTRR_NUM_TYPES])
+		return 0;
+
+	memset(range, 0, sizeof(range));
+	nr_range = 0;
+	if (mtrr_tom2) {
+		range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
+		range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
+		if (highest_pfn < range[nr_range].end + 1)
+			highest_pfn = range[nr_range].end + 1;
+		nr_range++;
+	}
+	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
+
+	changed = 0;
+	total_real_trim_size = 0;
+
+	/* check the top at first */
+	i = nr_range - 1;
+	if (range[i].end + 1 < end_pfn) {
+			total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
+	}
+
+	if (total_real_trim_size) {
 		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
-			" all of memory, losing %luMB of RAM.\n",
-			(end_pfn - highest_pfn) >> (20 - PAGE_SHIFT));
+			" all of memory, losing %lluMB of RAM.\n",
+			total_real_trim_size >> 20);
 
 		WARN_ON(1);
 
-		printk(KERN_INFO "update e820 for mtrr\n");
-		trim_start = highest_pfn;
-		trim_start <<= PAGE_SHIFT;
-		trim_size = end_pfn;
-		trim_size <<= PAGE_SHIFT;
-		trim_size -= trim_start;
-		update_memory_range(trim_start, trim_size, E820_RAM,
-					E820_RESERVED);
+		printk(KERN_INFO "update e820 for mtrr -- end_pfn\n");
 		update_e820();
-		return 1;
+		changed = 1;
 	}
 
-	return 0;
+	total_real_trim_size = 0;
+	if (range[0].start)
+		total_real_trim_size += real_trim_memory(0, range[0].start);
+
+	for (i = 0; i < nr_range - 1; i--) {
+		if (range[i].end + 1 < range[i+1].start)
+			total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
+	}
+
+	if (total_real_trim_size) {
+		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
+			" all of memory, losing %lluMB of RAM.\n",
+			total_real_trim_size >> 20);
+
+		WARN_ON(1);
+
+		printk(KERN_INFO "update e820 for mtrr -- holes\n");
+		update_e820();
+		changed = 1;
+	}
+
+	return changed;
 }
 
 /**
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 31ea2bb8c91..857f706273a 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -783,10 +783,11 @@ static int __init parse_memmap(char *arg)
 	return 0;
 }
 early_param("memmap", parse_memmap);
-void __init update_memory_range(u64 start, u64 size, unsigned old_type,
+u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
 				unsigned new_type)
 {
 	int i;
+	u64 real_updated_size = 0;
 
 	BUG_ON(old_type == new_type);
 
@@ -798,6 +799,7 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type,
 		/* totally covered? */
 		if (ei->addr >= start && ei->size <= size) {
 			ei->type = new_type;
+			real_updated_size += ei->size;
 			continue;
 		}
 		/* partially covered */
@@ -807,7 +809,10 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type,
 			continue;
 		add_memory_region(final_start, final_end - final_start,
 					 new_type);
+		real_updated_size += final_end - final_start;
 	}
+
+	return real_updated_size;
 }
 
 void __init finish_e820_parsing(void)
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 124480c0008..848b2cd2d1d 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -829,10 +829,11 @@ void __init finish_e820_parsing(void)
 	}
 }
 
-void __init update_memory_range(u64 start, u64 size, unsigned old_type,
+u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
 				unsigned new_type)
 {
 	int i;
+	u64 real_updated_size = 0;
 
 	BUG_ON(old_type == new_type);
 
@@ -844,6 +845,7 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type,
 		/* totally covered? */
 		if (ei->addr >= start && ei->size <= size) {
 			ei->type = new_type;
+			real_updated_size += ei->size;
 			continue;
 		}
 		/* partially covered */
@@ -853,7 +855,9 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type,
 			continue;
 		add_memory_region(final_start, final_end - final_start,
 					 new_type);
+		real_updated_size += final_end - final_start;
 	}
+	return real_updated_size;
 }
 
 void __init update_e820(void)
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index e1f10c60901..af0711b220d 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -31,7 +31,7 @@ extern void propagate_e820_map(void);
 extern void register_bootmem_low_pages(unsigned long max_low_pfn);
 extern void add_memory_region(unsigned long long start,
 			      unsigned long long size, int type);
-extern void update_memory_range(u64 start, u64 size, unsigned old_type,
+extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
 				unsigned new_type);
 extern void e820_register_memory(void);
 extern void limit_regions(unsigned long long size);
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index 71c4d685d30..6ae3e280328 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -21,7 +21,7 @@ extern unsigned long find_e820_area_size(unsigned long start,
 					 unsigned long align);
 extern void add_memory_region(unsigned long start, unsigned long size,
 			      int type);
-extern void update_memory_range(u64 start, u64 size, unsigned old_type,
+extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
 				unsigned new_type);
 extern void setup_memory_region(void);
 extern void contig_e820_setup(void);
-- 
cgit v1.2.3-70-g09d2


From b79cd8f1268bab57ff85b19d131f7f23deab2dee Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 11 May 2008 00:30:15 -0700
Subject: x86: make e820.c to have common functions

remove the duplicated copy of these functions.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile   |   2 +-
 arch/x86/kernel/e820.c     | 475 +++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/e820_32.c  | 411 +--------------------------------------
 arch/x86/kernel/e820_64.c  | 444 ------------------------------------------
 arch/x86/kernel/setup_32.c |   2 +-
 include/asm-x86/e820.h     |  14 ++
 include/asm-x86/e820_32.h  |  12 --
 include/asm-x86/e820_64.h  |  12 --
 8 files changed, 496 insertions(+), 876 deletions(-)
 create mode 100644 arch/x86/kernel/e820.c

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3b472..5369a4e36f1 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -22,7 +22,7 @@ obj-y			+= setup_$(BITS).o i8259_$(BITS).o setup.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o setup64.o
-obj-y			+= bootflag.o e820_$(BITS).o
+obj-y			+= bootflag.o e820_$(BITS).o e820.o
 obj-y			+= pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o pci-nommu.o
 obj-$(CONFIG_X86_64)	+= bugs_64.o
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
new file mode 100644
index 00000000000..2cb686f60d0
--- /dev/null
+++ b/arch/x86/kernel/e820.c
@@ -0,0 +1,475 @@
+/*
+ * Handle the memory map.
+ * The functions here do the job until bootmem takes over.
+ *
+ *  Getting sanitize_e820_map() in sync with i386 version by applying change:
+ *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
+ *     Alex Achenbach <xela@slit.de>, December 2002.
+ *  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/ioport.h>
+#include <linux/string.h>
+#include <linux/kexec.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/pfn.h>
+
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/e820.h>
+#include <asm/setup.h>
+
+struct e820map e820;
+
+/* For PCI or other memory-mapped resources */
+unsigned long pci_mem_start = 0xaeedbabe;
+#ifdef CONFIG_PCI
+EXPORT_SYMBOL(pci_mem_start);
+#endif
+
+/*
+ * This function checks if any part of the range <start,end> is mapped
+ * with type.
+ */
+int
+e820_any_mapped(u64 start, u64 end, unsigned type)
+{
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+
+		if (type && ei->type != type)
+			continue;
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(e820_any_mapped);
+
+/*
+ * This function checks if the entire range <start,end> is mapped with type.
+ *
+ * Note: this function only works correct if the e820 table is sorted and
+ * not-overlapping, which is the case
+ */
+int __init e820_all_mapped(u64 start, u64 end, unsigned type)
+{
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+
+		if (type && ei->type != type)
+			continue;
+		/* is the region (part) in overlap with the current region ?*/
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+
+		/* if the region is at the beginning of <start,end> we move
+		 * start to the end of the region since it's ok until there
+		 */
+		if (ei->addr <= start)
+			start = ei->addr + ei->size;
+		/*
+		 * if start is now at or beyond end, we're done, full
+		 * coverage
+		 */
+		if (start >= end)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Add a memory region to the kernel e820 map.
+ */
+void __init add_memory_region(u64 start, u64 size, int type)
+{
+	int x = e820.nr_map;
+
+	if (x == E820MAX) {
+		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+		return;
+	}
+
+	e820.map[x].addr = start;
+	e820.map[x].size = size;
+	e820.map[x].type = type;
+	e820.nr_map++;
+}
+
+void __init e820_print_map(char *who)
+{
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
+		       (unsigned long long) e820.map[i].addr,
+		       (unsigned long long)
+		       (e820.map[i].addr + e820.map[i].size));
+		switch (e820.map[i].type) {
+		case E820_RAM:
+			printk(KERN_CONT "(usable)\n");
+			break;
+		case E820_RESERVED:
+			printk(KERN_CONT "(reserved)\n");
+			break;
+		case E820_ACPI:
+			printk(KERN_CONT "(ACPI data)\n");
+			break;
+		case E820_NVS:
+			printk(KERN_CONT "(ACPI NVS)\n");
+			break;
+		default:
+			printk(KERN_CONT "type %u\n", e820.map[i].type);
+			break;
+		}
+	}
+}
+
+/*
+ * Sanitize the BIOS e820 map.
+ *
+ * Some e820 responses include overlapping entries. The following
+ * replaces the original e820 map with a new one, removing overlaps.
+ *
+ */
+int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
+{
+	struct change_member {
+		struct e820entry *pbios; /* pointer to original bios entry */
+		unsigned long long addr; /* address for this change point */
+	};
+	static struct change_member change_point_list[2*E820MAX] __initdata;
+	static struct change_member *change_point[2*E820MAX] __initdata;
+	static struct e820entry *overlap_list[E820MAX] __initdata;
+	static struct e820entry new_bios[E820MAX] __initdata;
+	struct change_member *change_tmp;
+	unsigned long current_type, last_type;
+	unsigned long long last_addr;
+	int chgidx, still_changing;
+	int overlap_entries;
+	int new_bios_entry;
+	int old_nr, new_nr, chg_nr;
+	int i;
+
+	/*
+		Visually we're performing the following
+		(1,2,3,4 = memory types)...
+
+		Sample memory map (w/overlaps):
+		   ____22__________________
+		   ______________________4_
+		   ____1111________________
+		   _44_____________________
+		   11111111________________
+		   ____________________33__
+		   ___________44___________
+		   __________33333_________
+		   ______________22________
+		   ___________________2222_
+		   _________111111111______
+		   _____________________11_
+		   _________________4______
+
+		Sanitized equivalent (no overlap):
+		   1_______________________
+		   _44_____________________
+		   ___1____________________
+		   ____22__________________
+		   ______11________________
+		   _________1______________
+		   __________3_____________
+		   ___________44___________
+		   _____________33_________
+		   _______________2________
+		   ________________1_______
+		   _________________4______
+		   ___________________2____
+		   ____________________33__
+		   ______________________4_
+	*/
+
+	/* if there's only one memory region, don't bother */
+	if (*pnr_map < 2)
+		return -1;
+
+	old_nr = *pnr_map;
+
+	/* bail out if we find any unreasonable addresses in bios map */
+	for (i = 0; i < old_nr; i++)
+		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
+			return -1;
+
+	/* create pointers for initial change-point information (for sorting) */
+	for (i = 0; i < 2 * old_nr; i++)
+		change_point[i] = &change_point_list[i];
+
+	/* record all known change-points (starting and ending addresses),
+	   omitting those that are for empty memory regions */
+	chgidx = 0;
+	for (i = 0; i < old_nr; i++)	{
+		if (biosmap[i].size != 0) {
+			change_point[chgidx]->addr = biosmap[i].addr;
+			change_point[chgidx++]->pbios = &biosmap[i];
+			change_point[chgidx]->addr = biosmap[i].addr +
+				biosmap[i].size;
+			change_point[chgidx++]->pbios = &biosmap[i];
+		}
+	}
+	chg_nr = chgidx;
+
+	/* sort change-point list by memory addresses (low -> high) */
+	still_changing = 1;
+	while (still_changing)	{
+		still_changing = 0;
+		for (i = 1; i < chg_nr; i++)  {
+			unsigned long long curaddr, lastaddr;
+			unsigned long long curpbaddr, lastpbaddr;
+
+			curaddr = change_point[i]->addr;
+			lastaddr = change_point[i - 1]->addr;
+			curpbaddr = change_point[i]->pbios->addr;
+			lastpbaddr = change_point[i - 1]->pbios->addr;
+
+			/*
+			 * swap entries, when:
+			 *
+			 * curaddr > lastaddr or
+			 * curaddr == lastaddr and curaddr == curpbaddr and
+			 * lastaddr != lastpbaddr
+			 */
+			if (curaddr < lastaddr ||
+			    (curaddr == lastaddr && curaddr == curpbaddr &&
+			     lastaddr != lastpbaddr)) {
+				change_tmp = change_point[i];
+				change_point[i] = change_point[i-1];
+				change_point[i-1] = change_tmp;
+				still_changing = 1;
+			}
+		}
+	}
+
+	/* create a new bios memory map, removing overlaps */
+	overlap_entries = 0;	 /* number of entries in the overlap table */
+	new_bios_entry = 0;	 /* index for creating new bios map entries */
+	last_type = 0;		 /* start with undefined memory type */
+	last_addr = 0;		 /* start with 0 as last starting address */
+
+	/* loop through change-points, determining affect on the new bios map */
+	for (chgidx = 0; chgidx < chg_nr; chgidx++) {
+		/* keep track of all overlapping bios entries */
+		if (change_point[chgidx]->addr ==
+		    change_point[chgidx]->pbios->addr) {
+			/*
+			 * add map entry to overlap list (> 1 entry
+			 * implies an overlap)
+			 */
+			overlap_list[overlap_entries++] =
+				change_point[chgidx]->pbios;
+		} else {
+			/*
+			 * remove entry from list (order independent,
+			 * so swap with last)
+			 */
+			for (i = 0; i < overlap_entries; i++) {
+				if (overlap_list[i] ==
+				    change_point[chgidx]->pbios)
+					overlap_list[i] =
+						overlap_list[overlap_entries-1];
+			}
+			overlap_entries--;
+		}
+		/*
+		 * if there are overlapping entries, decide which
+		 * "type" to use (larger value takes precedence --
+		 * 1=usable, 2,3,4,4+=unusable)
+		 */
+		current_type = 0;
+		for (i = 0; i < overlap_entries; i++)
+			if (overlap_list[i]->type > current_type)
+				current_type = overlap_list[i]->type;
+		/*
+		 * continue building up new bios map based on this
+		 * information
+		 */
+		if (current_type != last_type)	{
+			if (last_type != 0)	 {
+				new_bios[new_bios_entry].size =
+					change_point[chgidx]->addr - last_addr;
+				/*
+				 * move forward only if the new size
+				 * was non-zero
+				 */
+				if (new_bios[new_bios_entry].size != 0)
+					/*
+					 * no more space left for new
+					 * bios entries ?
+					 */
+					if (++new_bios_entry >= E820MAX)
+						break;
+			}
+			if (current_type != 0)	{
+				new_bios[new_bios_entry].addr =
+					change_point[chgidx]->addr;
+				new_bios[new_bios_entry].type = current_type;
+				last_addr = change_point[chgidx]->addr;
+			}
+			last_type = current_type;
+		}
+	}
+	/* retain count for new bios entries */
+	new_nr = new_bios_entry;
+
+	/* copy new bios mapping into original location */
+	memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
+	*pnr_map = new_nr;
+
+	return 0;
+}
+
+/*
+ * Copy the BIOS e820 map into a safe place.
+ *
+ * Sanity-check it while we're at it..
+ *
+ * If we're lucky and live on a modern system, the setup code
+ * will have given us a memory map that we can use to properly
+ * set up memory.  If we aren't, we'll fake a memory map.
+ */
+int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
+{
+	/* Only one memory region (or negative)? Ignore it */
+	if (nr_map < 2)
+		return -1;
+
+	do {
+		u64 start = biosmap->addr;
+		u64 size = biosmap->size;
+		u64 end = start + size;
+		u32 type = biosmap->type;
+
+		/* Overflow in 64 bits? Ignore the memory map. */
+		if (start > end)
+			return -1;
+
+		add_memory_region(start, size, type);
+	} while (biosmap++, --nr_map);
+	return 0;
+}
+
+u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
+				unsigned new_type)
+{
+	int i;
+	u64 real_updated_size = 0;
+
+	BUG_ON(old_type == new_type);
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		u64 final_start, final_end;
+		if (ei->type != old_type)
+			continue;
+		/* totally covered? */
+		if (ei->addr >= start &&
+		    (ei->addr + ei->size) <= (start + size)) {
+			ei->type = new_type;
+			real_updated_size += ei->size;
+			continue;
+		}
+		/* partially covered */
+		final_start = max(start, ei->addr);
+		final_end = min(start + size, ei->addr + ei->size);
+		if (final_start >= final_end)
+			continue;
+		add_memory_region(final_start, final_end - final_start,
+					 new_type);
+		real_updated_size += final_end - final_start;
+	}
+	return real_updated_size;
+}
+
+void __init update_e820(void)
+{
+	u8 nr_map;
+
+	nr_map = e820.nr_map;
+	if (sanitize_e820_map(e820.map, &nr_map))
+		return;
+	e820.nr_map = nr_map;
+	printk(KERN_INFO "modified physical RAM map:\n");
+	e820_print_map("modified");
+}
+
+/*
+ * Search for the biggest gap in the low 32 bits of the e820
+ * memory space.  We pass this space to PCI to assign MMIO resources
+ * for hotplug or unconfigured devices in.
+ * Hopefully the BIOS let enough space left.
+ */
+__init void e820_setup_gap(void)
+{
+	unsigned long gapstart, gapsize, round;
+	unsigned long long last;
+	int i;
+	int found = 0;
+
+	last = 0x100000000ull;
+	gapstart = 0x10000000;
+	gapsize = 0x400000;
+	i = e820.nr_map;
+	while (--i >= 0) {
+		unsigned long long start = e820.map[i].addr;
+		unsigned long long end = start + e820.map[i].size;
+
+		/*
+		 * Since "last" is at most 4GB, we know we'll
+		 * fit in 32 bits if this condition is true
+		 */
+		if (last > end) {
+			unsigned long gap = last - end;
+
+			if (gap > gapsize) {
+				gapsize = gap;
+				gapstart = end;
+				found = 1;
+			}
+		}
+		if (start < last)
+			last = start;
+	}
+
+#ifdef CONFIG_X86_64
+	if (!found) {
+		gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
+		printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
+		       "address range\n"
+		       KERN_ERR "PCI: Unassigned devices with 32bit resource "
+		       "registers may break!\n");
+	}
+#endif
+
+	/*
+	 * See how much we want to round up: start off with
+	 * rounding to the next 1MB area.
+	 */
+	round = 0x100000;
+	while ((gapsize >> 4) > round)
+		round += round;
+	/* Fun with two's complement */
+	pci_mem_start = (gapstart + round) & -round;
+
+	printk(KERN_INFO
+	       "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
+	       pci_mem_start, gapstart, gapsize);
+}
+
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 751d517d802..dfe25751038 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -16,21 +16,6 @@
 #include <asm/e820.h>
 #include <asm/setup.h>
 
-struct e820map e820;
-struct change_member {
-	struct e820entry *pbios; /* pointer to original bios entry */
-	unsigned long long addr; /* address for this change point */
-};
-static struct change_member change_point_list[2*E820MAX] __initdata;
-static struct change_member *change_point[2*E820MAX] __initdata;
-static struct e820entry *overlap_list[E820MAX] __initdata;
-static struct e820entry new_bios[E820MAX] __initdata;
-/* For PCI or other memory-mapped resources */
-unsigned long pci_mem_start = 0x10000000;
-#ifdef CONFIG_PCI
-EXPORT_SYMBOL(pci_mem_start);
-#endif
-
 static struct resource system_rom_resource = {
 	.name	= "System ROM",
 	.start	= 0xf0000,
@@ -254,223 +239,6 @@ void __init e820_mark_nosave_regions(void)
 }
 #endif
 
-void __init add_memory_region(unsigned long long start,
-			      unsigned long long size, int type)
-{
-	int x;
-
-	x = e820.nr_map;
-
-	if (x == E820MAX) {
-		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
-		return;
-	}
-
-	e820.map[x].addr = start;
-	e820.map[x].size = size;
-	e820.map[x].type = type;
-	e820.nr_map++;
-} /* add_memory_region */
-
-/*
- * Sanitize the BIOS e820 map.
- *
- * Some e820 responses include overlapping entries.  The following
- * replaces the original e820 map with a new one, removing overlaps.
- *
- */
-int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
-{
-	struct change_member *change_tmp;
-	unsigned long current_type, last_type;
-	unsigned long long last_addr;
-	int chgidx, still_changing;
-	int overlap_entries;
-	int new_bios_entry;
-	int old_nr, new_nr, chg_nr;
-	int i;
-
-	/*
-		Visually we're performing the following (1,2,3,4 = memory types)...
-
-		Sample memory map (w/overlaps):
-		   ____22__________________
-		   ______________________4_
-		   ____1111________________
-		   _44_____________________
-		   11111111________________
-		   ____________________33__
-		   ___________44___________
-		   __________33333_________
-		   ______________22________
-		   ___________________2222_
-		   _________111111111______
-		   _____________________11_
-		   _________________4______
-
-		Sanitized equivalent (no overlap):
-		   1_______________________
-		   _44_____________________
-		   ___1____________________
-		   ____22__________________
-		   ______11________________
-		   _________1______________
-		   __________3_____________
-		   ___________44___________
-		   _____________33_________
-		   _______________2________
-		   ________________1_______
-		   _________________4______
-		   ___________________2____
-		   ____________________33__
-		   ______________________4_
-	*/
-	/* if there's only one memory region, don't bother */
-	if (*pnr_map < 2) {
-		return -1;
-	}
-
-	old_nr = *pnr_map;
-
-	/* bail out if we find any unreasonable addresses in bios map */
-	for (i=0; i<old_nr; i++)
-		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
-			return -1;
-		}
-
-	/* create pointers for initial change-point information (for sorting) */
-	for (i=0; i < 2*old_nr; i++)
-		change_point[i] = &change_point_list[i];
-
-	/* record all known change-points (starting and ending addresses),
-	   omitting those that are for empty memory regions */
-	chgidx = 0;
-	for (i=0; i < old_nr; i++)	{
-		if (biosmap[i].size != 0) {
-			change_point[chgidx]->addr = biosmap[i].addr;
-			change_point[chgidx++]->pbios = &biosmap[i];
-			change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
-			change_point[chgidx++]->pbios = &biosmap[i];
-		}
-	}
-	chg_nr = chgidx;    	/* true number of change-points */
-
-	/* sort change-point list by memory addresses (low -> high) */
-	still_changing = 1;
-	while (still_changing)	{
-		still_changing = 0;
-		for (i=1; i < chg_nr; i++)  {
-			/* if <current_addr> > <last_addr>, swap */
-			/* or, if current=<start_addr> & last=<end_addr>, swap */
-			if ((change_point[i]->addr < change_point[i-1]->addr) ||
-				((change_point[i]->addr == change_point[i-1]->addr) &&
-				 (change_point[i]->addr == change_point[i]->pbios->addr) &&
-				 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
-			   )
-			{
-				change_tmp = change_point[i];
-				change_point[i] = change_point[i-1];
-				change_point[i-1] = change_tmp;
-				still_changing=1;
-			}
-		}
-	}
-
-	/* create a new bios memory map, removing overlaps */
-	overlap_entries=0;	 /* number of entries in the overlap table */
-	new_bios_entry=0;	 /* index for creating new bios map entries */
-	last_type = 0;		 /* start with undefined memory type */
-	last_addr = 0;		 /* start with 0 as last starting address */
-	/* loop through change-points, determining affect on the new bios map */
-	for (chgidx=0; chgidx < chg_nr; chgidx++)
-	{
-		/* keep track of all overlapping bios entries */
-		if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
-		{
-			/* add map entry to overlap list (> 1 entry implies an overlap) */
-			overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
-		}
-		else
-		{
-			/* remove entry from list (order independent, so swap with last) */
-			for (i=0; i<overlap_entries; i++)
-			{
-				if (overlap_list[i] == change_point[chgidx]->pbios)
-					overlap_list[i] = overlap_list[overlap_entries-1];
-			}
-			overlap_entries--;
-		}
-		/* if there are overlapping entries, decide which "type" to use */
-		/* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
-		current_type = 0;
-		for (i=0; i<overlap_entries; i++)
-			if (overlap_list[i]->type > current_type)
-				current_type = overlap_list[i]->type;
-		/* continue building up new bios map based on this information */
-		if (current_type != last_type)	{
-			if (last_type != 0)	 {
-				new_bios[new_bios_entry].size =
-					change_point[chgidx]->addr - last_addr;
-				/* move forward only if the new size was non-zero */
-				if (new_bios[new_bios_entry].size != 0)
-					if (++new_bios_entry >= E820MAX)
-						break; 	/* no more space left for new bios entries */
-			}
-			if (current_type != 0)	{
-				new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
-				new_bios[new_bios_entry].type = current_type;
-				last_addr=change_point[chgidx]->addr;
-			}
-			last_type = current_type;
-		}
-	}
-	new_nr = new_bios_entry;   /* retain count for new bios entries */
-
-	/* copy new bios mapping into original location */
-	memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
-	*pnr_map = new_nr;
-
-	return 0;
-}
-
-/*
- * Copy the BIOS e820 map into a safe place.
- *
- * Sanity-check it while we're at it..
- *
- * If we're lucky and live on a modern system, the setup code
- * will have given us a memory map that we can use to properly
- * set up memory.  If we aren't, we'll fake a memory map.
- *
- * We check to see that the memory map contains at least 2 elements
- * before we'll use it, because the detection code in setup.S may
- * not be perfect and most every PC known to man has two memory
- * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
- * thinkpad 560x, for example, does not cooperate with the memory
- * detection code.)
- */
-int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
-{
-	/* Only one memory region (or negative)? Ignore it */
-	if (nr_map < 2)
-		return -1;
-
-	do {
-		u64 start = biosmap->addr;
-		u64 size = biosmap->size;
-		u64 end = start + size;
-		u32 type = biosmap->type;
-
-		/* Overflow in 64 bits? Ignore the memory map. */
-		if (start > end)
-			return -1;
-
-		add_memory_region(start, size, type);
-	} while (biosmap++, --nr_map);
-
-	return 0;
-}
-
 /*
  * Find the highest page frame number we have available
  */
@@ -535,86 +303,12 @@ void __init register_bootmem_low_pages(unsigned long max_low_pfn)
 	}
 }
 
-void __init e820_register_memory(void)
-{
-	unsigned long gapstart, gapsize, round;
-	unsigned long long last;
-	int i;
-
-	/*
-	 * Search for the biggest gap in the low 32 bits of the e820
-	 * memory space.
-	 */
-	last = 0x100000000ull;
-	gapstart = 0x10000000;
-	gapsize = 0x400000;
-	i = e820.nr_map;
-	while (--i >= 0) {
-		unsigned long long start = e820.map[i].addr;
-		unsigned long long end = start + e820.map[i].size;
-
-		/*
-		 * Since "last" is at most 4GB, we know we'll
-		 * fit in 32 bits if this condition is true
-		 */
-		if (last > end) {
-			unsigned long gap = last - end;
-
-			if (gap > gapsize) {
-				gapsize = gap;
-				gapstart = end;
-			}
-		}
-		if (start < last)
-			last = start;
-	}
-
-	/*
-	 * See how much we want to round up: start off with
-	 * rounding to the next 1MB area.
-	 */
-	round = 0x100000;
-	while ((gapsize >> 4) > round)
-		round += round;
-	/* Fun with two's complement */
-	pci_mem_start = (gapstart + round) & -round;
-
-	printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
-		pci_mem_start, gapstart, gapsize);
-}
-
-static void __init print_memory_map(char *who)
-{
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		printk(" %s: %016Lx - %016Lx ", who,
-			e820.map[i].addr,
-			e820.map[i].addr + e820.map[i].size);
-		switch (e820.map[i].type) {
-		case E820_RAM:	printk("(usable)\n");
-				break;
-		case E820_RESERVED:
-				printk("(reserved)\n");
-				break;
-		case E820_ACPI:
-				printk("(ACPI data)\n");
-				break;
-		case E820_NVS:
-				printk("(ACPI NVS)\n");
-				break;
-		default:	printk("type %u\n", e820.map[i].type);
-				break;
-		}
-	}
-}
-
 void __init limit_regions(unsigned long long size)
 {
 	unsigned long long current_addr;
 	int i;
 
-	print_memory_map("limit_regions start");
+	e820_print_map("limit_regions start");
 	for (i = 0; i < e820.nr_map; i++) {
 		current_addr = e820.map[i].addr + e820.map[i].size;
 		if (current_addr < size)
@@ -633,62 +327,10 @@ void __init limit_regions(unsigned long long size)
 			e820.nr_map = i + 1;
 			e820.map[i].size -= current_addr - size;
 		}
-		print_memory_map("limit_regions endfor");
+		e820_print_map("limit_regions endfor");
 		return;
 	}
-	print_memory_map("limit_regions endfunc");
-}
-
-/*
- * This function checks if any part of the range <start,end> is mapped
- * with type.
- */
-int
-e820_any_mapped(u64 start, u64 end, unsigned type)
-{
-	int i;
-	for (i = 0; i < e820.nr_map; i++) {
-		const struct e820entry *ei = &e820.map[i];
-		if (type && ei->type != type)
-			continue;
-		if (ei->addr >= end || ei->addr + ei->size <= start)
-			continue;
-		return 1;
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(e820_any_mapped);
-
- /*
-  * This function checks if the entire range <start,end> is mapped with type.
-  *
-  * Note: this function only works correct if the e820 table is sorted and
-  * not-overlapping, which is the case
-  */
-int __init
-e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
-{
-	u64 start = s;
-	u64 end = e;
-	int i;
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-		if (type && ei->type != type)
-			continue;
-		/* is the region (part) in overlap with the current region ?*/
-		if (ei->addr >= end || ei->addr + ei->size <= start)
-			continue;
-		/* if the region is at the beginning of <start,end> we move
-		 * start to the end of the region since it's ok until there
-		 */
-		if (ei->addr <= start)
-			start = ei->addr + ei->size;
-		/* if start is now at or beyond end, we're done, full
-		 * coverage */
-		if (start >= end)
-			return 1; /* we're done */
-	}
-	return 0;
+	e820_print_map("limit_regions endfunc");
 }
 
 /* Overridden in paravirt.c if CONFIG_PARAVIRT */
@@ -700,7 +342,7 @@ char * __init __attribute__((weak)) memory_setup(void)
 void __init setup_memory_map(void)
 {
 	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
-	print_memory_map(memory_setup());
+	e820_print_map(memory_setup());
 }
 
 static int __initdata user_defined_memmap;
@@ -783,55 +425,12 @@ static int __init parse_memmap(char *arg)
 	return 0;
 }
 early_param("memmap", parse_memmap);
-u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
-				unsigned new_type)
-{
-	int i;
-	u64 real_updated_size = 0;
-
-	BUG_ON(old_type == new_type);
-
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-		u64 final_start, final_end;
-		if (ei->type != old_type)
-			continue;
-		/* totally covered? */
-		if (ei->addr >= start &&
-		    (ei->addr + ei->size) <= (start + size)) {
-			ei->type = new_type;
-			real_updated_size += ei->size;
-			continue;
-		}
-		/* partially covered */
-		final_start = max(start, ei->addr);
-		final_end = min(start + size, ei->addr + ei->size);
-		if (final_start >= final_end)
-			continue;
-		add_memory_region(final_start, final_end - final_start,
-					 new_type);
-		real_updated_size += final_end - final_start;
-	}
-
-	return real_updated_size;
-}
 
 void __init finish_e820_parsing(void)
 {
 	if (user_defined_memmap) {
 		printk(KERN_INFO "user-defined physical RAM map:\n");
-		print_memory_map("user");
+		e820_print_map("user");
 	}
 }
 
-void __init update_e820(void)
-{
-	u8 nr_map;
-
-	nr_map = e820.nr_map;
-	if (sanitize_e820_map(e820.map, &nr_map))
-		return;
-	e820.nr_map = nr_map;
-	printk(KERN_INFO "modified physical RAM map:\n");
-	print_memory_map("modified");
-}
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index c45b4dea405..28a14eb65d3 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -29,8 +29,6 @@
 #include <asm/kdebug.h>
 #include <asm/trampoline.h>
 
-struct e820map e820;
-
 /*
  * PFN of last memory page.
  */
@@ -176,62 +174,6 @@ again:
 	}
 	return changed;
 }
-/*
- * This function checks if any part of the range <start,end> is mapped
- * with type.
- */
-int
-e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
-{
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-
-		if (type && ei->type != type)
-			continue;
-		if (ei->addr >= end || ei->addr + ei->size <= start)
-			continue;
-		return 1;
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(e820_any_mapped);
-
-/*
- * This function checks if the entire range <start,end> is mapped with type.
- *
- * Note: this function only works correct if the e820 table is sorted and
- * not-overlapping, which is the case
- */
-int __init e820_all_mapped(unsigned long start, unsigned long end,
-			   unsigned type)
-{
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-
-		if (type && ei->type != type)
-			continue;
-		/* is the region (part) in overlap with the current region ?*/
-		if (ei->addr >= end || ei->addr + ei->size <= start)
-			continue;
-
-		/* if the region is at the beginning of <start,end> we move
-		 * start to the end of the region since it's ok until there
-		 */
-		if (ei->addr <= start)
-			start = ei->addr + ei->size;
-		/*
-		 * if start is now at or beyond end, we're done, full
-		 * coverage
-		 */
-		if (start >= end)
-			return 1;
-	}
-	return 0;
-}
 
 /*
  * Find a free area with specified alignment in a specific range.
@@ -434,24 +376,6 @@ e820_register_active_regions(int nid, unsigned long start_pfn,
 			add_active_range(nid, ei_startpfn, ei_endpfn);
 }
 
-/*
- * Add a memory region to the kernel e820 map.
- */
-void __init add_memory_region(unsigned long start, unsigned long size, int type)
-{
-	int x = e820.nr_map;
-
-	if (x == E820MAX) {
-		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
-		return;
-	}
-
-	e820.map[x].addr = start;
-	e820.map[x].size = size;
-	e820.map[x].type = type;
-	e820.nr_map++;
-}
-
 /*
  * Find the hole size (in bytes) in the memory range.
  * @start: starting address of the memory range to scan
@@ -473,266 +397,6 @@ unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
 	return end - start - (ram << PAGE_SHIFT);
 }
 
-static void __init e820_print_map(char *who)
-{
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
-		       (unsigned long long) e820.map[i].addr,
-		       (unsigned long long)
-		       (e820.map[i].addr + e820.map[i].size));
-		switch (e820.map[i].type) {
-		case E820_RAM:
-			printk(KERN_CONT "(usable)\n");
-			break;
-		case E820_RESERVED:
-			printk(KERN_CONT "(reserved)\n");
-			break;
-		case E820_ACPI:
-			printk(KERN_CONT "(ACPI data)\n");
-			break;
-		case E820_NVS:
-			printk(KERN_CONT "(ACPI NVS)\n");
-			break;
-		default:
-			printk(KERN_CONT "type %u\n", e820.map[i].type);
-			break;
-		}
-	}
-}
-
-/*
- * Sanitize the BIOS e820 map.
- *
- * Some e820 responses include overlapping entries. The following
- * replaces the original e820 map with a new one, removing overlaps.
- *
- */
-static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
-{
-	struct change_member {
-		struct e820entry *pbios; /* pointer to original bios entry */
-		unsigned long long addr; /* address for this change point */
-	};
-	static struct change_member change_point_list[2*E820MAX] __initdata;
-	static struct change_member *change_point[2*E820MAX] __initdata;
-	static struct e820entry *overlap_list[E820MAX] __initdata;
-	static struct e820entry new_bios[E820MAX] __initdata;
-	struct change_member *change_tmp;
-	unsigned long current_type, last_type;
-	unsigned long long last_addr;
-	int chgidx, still_changing;
-	int overlap_entries;
-	int new_bios_entry;
-	int old_nr, new_nr, chg_nr;
-	int i;
-
-	/*
-		Visually we're performing the following
-		(1,2,3,4 = memory types)...
-
-		Sample memory map (w/overlaps):
-		   ____22__________________
-		   ______________________4_
-		   ____1111________________
-		   _44_____________________
-		   11111111________________
-		   ____________________33__
-		   ___________44___________
-		   __________33333_________
-		   ______________22________
-		   ___________________2222_
-		   _________111111111______
-		   _____________________11_
-		   _________________4______
-
-		Sanitized equivalent (no overlap):
-		   1_______________________
-		   _44_____________________
-		   ___1____________________
-		   ____22__________________
-		   ______11________________
-		   _________1______________
-		   __________3_____________
-		   ___________44___________
-		   _____________33_________
-		   _______________2________
-		   ________________1_______
-		   _________________4______
-		   ___________________2____
-		   ____________________33__
-		   ______________________4_
-	*/
-
-	/* if there's only one memory region, don't bother */
-	if (*pnr_map < 2)
-		return -1;
-
-	old_nr = *pnr_map;
-
-	/* bail out if we find any unreasonable addresses in bios map */
-	for (i = 0; i < old_nr; i++)
-		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
-			return -1;
-
-	/* create pointers for initial change-point information (for sorting) */
-	for (i = 0; i < 2 * old_nr; i++)
-		change_point[i] = &change_point_list[i];
-
-	/* record all known change-points (starting and ending addresses),
-	   omitting those that are for empty memory regions */
-	chgidx = 0;
-	for (i = 0; i < old_nr; i++)	{
-		if (biosmap[i].size != 0) {
-			change_point[chgidx]->addr = biosmap[i].addr;
-			change_point[chgidx++]->pbios = &biosmap[i];
-			change_point[chgidx]->addr = biosmap[i].addr +
-				biosmap[i].size;
-			change_point[chgidx++]->pbios = &biosmap[i];
-		}
-	}
-	chg_nr = chgidx;
-
-	/* sort change-point list by memory addresses (low -> high) */
-	still_changing = 1;
-	while (still_changing)	{
-		still_changing = 0;
-		for (i = 1; i < chg_nr; i++)  {
-			unsigned long long curaddr, lastaddr;
-			unsigned long long curpbaddr, lastpbaddr;
-
-			curaddr = change_point[i]->addr;
-			lastaddr = change_point[i - 1]->addr;
-			curpbaddr = change_point[i]->pbios->addr;
-			lastpbaddr = change_point[i - 1]->pbios->addr;
-
-			/*
-			 * swap entries, when:
-			 *
-			 * curaddr > lastaddr or
-			 * curaddr == lastaddr and curaddr == curpbaddr and
-			 * lastaddr != lastpbaddr
-			 */
-			if (curaddr < lastaddr ||
-			    (curaddr == lastaddr && curaddr == curpbaddr &&
-			     lastaddr != lastpbaddr)) {
-				change_tmp = change_point[i];
-				change_point[i] = change_point[i-1];
-				change_point[i-1] = change_tmp;
-				still_changing = 1;
-			}
-		}
-	}
-
-	/* create a new bios memory map, removing overlaps */
-	overlap_entries = 0;	 /* number of entries in the overlap table */
-	new_bios_entry = 0;	 /* index for creating new bios map entries */
-	last_type = 0;		 /* start with undefined memory type */
-	last_addr = 0;		 /* start with 0 as last starting address */
-
-	/* loop through change-points, determining affect on the new bios map */
-	for (chgidx = 0; chgidx < chg_nr; chgidx++) {
-		/* keep track of all overlapping bios entries */
-		if (change_point[chgidx]->addr ==
-		    change_point[chgidx]->pbios->addr) {
-			/*
-			 * add map entry to overlap list (> 1 entry
-			 * implies an overlap)
-			 */
-			overlap_list[overlap_entries++] =
-				change_point[chgidx]->pbios;
-		} else {
-			/*
-			 * remove entry from list (order independent,
-			 * so swap with last)
-			 */
-			for (i = 0; i < overlap_entries; i++) {
-				if (overlap_list[i] ==
-				    change_point[chgidx]->pbios)
-					overlap_list[i] =
-						overlap_list[overlap_entries-1];
-			}
-			overlap_entries--;
-		}
-		/*
-		 * if there are overlapping entries, decide which
-		 * "type" to use (larger value takes precedence --
-		 * 1=usable, 2,3,4,4+=unusable)
-		 */
-		current_type = 0;
-		for (i = 0; i < overlap_entries; i++)
-			if (overlap_list[i]->type > current_type)
-				current_type = overlap_list[i]->type;
-		/*
-		 * continue building up new bios map based on this
-		 * information
-		 */
-		if (current_type != last_type)	{
-			if (last_type != 0)	 {
-				new_bios[new_bios_entry].size =
-					change_point[chgidx]->addr - last_addr;
-				/*
-				 * move forward only if the new size
-				 * was non-zero
-				 */
-				if (new_bios[new_bios_entry].size != 0)
-					/*
-					 * no more space left for new
-					 * bios entries ?
-					 */
-					if (++new_bios_entry >= E820MAX)
-						break;
-			}
-			if (current_type != 0)	{
-				new_bios[new_bios_entry].addr =
-					change_point[chgidx]->addr;
-				new_bios[new_bios_entry].type = current_type;
-				last_addr = change_point[chgidx]->addr;
-			}
-			last_type = current_type;
-		}
-	}
-	/* retain count for new bios entries */
-	new_nr = new_bios_entry;
-
-	/* copy new bios mapping into original location */
-	memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
-	*pnr_map = new_nr;
-
-	return 0;
-}
-
-/*
- * Copy the BIOS e820 map into a safe place.
- *
- * Sanity-check it while we're at it..
- *
- * If we're lucky and live on a modern system, the setup code
- * will have given us a memory map that we can use to properly
- * set up memory.  If we aren't, we'll fake a memory map.
- */
-static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
-{
-	/* Only one memory region (or negative)? Ignore it */
-	if (nr_map < 2)
-		return -1;
-
-	do {
-		u64 start = biosmap->addr;
-		u64 size = biosmap->size;
-		u64 end = start + size;
-		u32 type = biosmap->type;
-
-		/* Overflow in 64 bits? Ignore the memory map. */
-		if (start > end)
-			return -1;
-
-		add_memory_region(start, size, type);
-	} while (biosmap++, --nr_map);
-	return 0;
-}
-
 static void early_panic(char *msg)
 {
 	early_printk(msg);
@@ -829,114 +493,6 @@ void __init finish_e820_parsing(void)
 	}
 }
 
-u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
-				unsigned new_type)
-{
-	int i;
-	u64 real_updated_size = 0;
-
-	BUG_ON(old_type == new_type);
-
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-		u64 final_start, final_end;
-		if (ei->type != old_type)
-			continue;
-		/* totally covered? */
-		if (ei->addr >= start &&
-		    (ei->addr + ei->size) <= (start + size)) {
-			ei->type = new_type;
-			real_updated_size += ei->size;
-			continue;
-		}
-		/* partially covered */
-		final_start = max(start, ei->addr);
-		final_end = min(start + size, ei->addr + ei->size);
-		if (final_start >= final_end)
-			continue;
-		add_memory_region(final_start, final_end - final_start,
-					 new_type);
-		real_updated_size += final_end - final_start;
-	}
-	return real_updated_size;
-}
-
-void __init update_e820(void)
-{
-	u8 nr_map;
-
-	nr_map = e820.nr_map;
-	if (sanitize_e820_map(e820.map, &nr_map))
-		return;
-	e820.nr_map = nr_map;
-	printk(KERN_INFO "modified physical RAM map:\n");
-	e820_print_map("modified");
-}
-
-unsigned long pci_mem_start = 0xaeedbabe;
-EXPORT_SYMBOL(pci_mem_start);
-
-/*
- * Search for the biggest gap in the low 32 bits of the e820
- * memory space.  We pass this space to PCI to assign MMIO resources
- * for hotplug or unconfigured devices in.
- * Hopefully the BIOS let enough space left.
- */
-__init void e820_setup_gap(void)
-{
-	unsigned long gapstart, gapsize, round;
-	unsigned long last;
-	int i;
-	int found = 0;
-
-	last = 0x100000000ull;
-	gapstart = 0x10000000;
-	gapsize = 0x400000;
-	i = e820.nr_map;
-	while (--i >= 0) {
-		unsigned long long start = e820.map[i].addr;
-		unsigned long long end = start + e820.map[i].size;
-
-		/*
-		 * Since "last" is at most 4GB, we know we'll
-		 * fit in 32 bits if this condition is true
-		 */
-		if (last > end) {
-			unsigned long gap = last - end;
-
-			if (gap > gapsize) {
-				gapsize = gap;
-				gapstart = end;
-				found = 1;
-			}
-		}
-		if (start < last)
-			last = start;
-	}
-
-	if (!found) {
-		gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
-		printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
-		       "address range\n"
-		       KERN_ERR "PCI: Unassigned devices with 32bit resource "
-		       "registers may break!\n");
-	}
-
-	/*
-	 * See how much we want to round up: start off with
-	 * rounding to the next 1MB area.
-	 */
-	round = 0x100000;
-	while ((gapsize >> 4) > round)
-		round += round;
-	/* Fun with two's complement */
-	pci_mem_start = (gapstart + round) & -round;
-
-	printk(KERN_INFO
-	       "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
-	       pci_mem_start, gapstart, gapsize);
-}
-
 int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
 {
 	int i;
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index b54c79c91ef..5faeab69edd 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -875,7 +875,7 @@ void __init setup_arch(char **cmdline_p)
 		get_smp_config();
 #endif
 
-	e820_register_memory();
+	e820_setup_gap();
 	e820_mark_nosave_regions();
 
 #ifdef CONFIG_VT
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 7004251fc66..b5b519feba1 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -20,6 +20,20 @@ struct e820map {
 	__u32 nr_map;
 	struct e820entry map[E820MAX];
 };
+
+extern struct e820map e820;
+
+extern int e820_any_mapped(u64 start, u64 end, unsigned type);
+extern int e820_all_mapped(u64 start, u64 end, unsigned type);
+extern void add_memory_region(u64 start, u64 size, int type);
+extern void e820_print_map(char *who);
+extern int sanitize_e820_map(struct e820entry *biosmap, char *pnr_map);
+extern int copy_e820_map(struct e820entry *biosmap, int nr_map);
+extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
+			       unsigned new_type);
+extern void update_e820(void);
+extern void e820_setup_gap(void);
+
 #endif /* __ASSEMBLY__ */
 
 #define ISA_START_ADDRESS	0xa0000
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index af0711b220d..9576b438fbd 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -21,19 +21,8 @@
 extern void setup_memory_map(void);
 extern void finish_e820_parsing(void);
 
-extern struct e820map e820;
-extern void update_e820(void);
-
-extern int e820_all_mapped(unsigned long start, unsigned long end,
-			   unsigned type);
-extern int e820_any_mapped(u64 start, u64 end, unsigned type);
 extern void propagate_e820_map(void);
 extern void register_bootmem_low_pages(unsigned long max_low_pfn);
-extern void add_memory_region(unsigned long long start,
-			      unsigned long long size, int type);
-extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
-				unsigned new_type);
-extern void e820_register_memory(void);
 extern void limit_regions(unsigned long long size);
 extern void init_iomem_resources(struct resource *code_resource,
 				 struct resource *data_resource,
@@ -47,6 +36,5 @@ static inline void e820_mark_nosave_regions(void)
 }
 #endif
 
-
 #endif/*!__ASSEMBLY__*/
 #endif/*__E820_HEADER*/
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index 6ae3e280328..9fac77e0144 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -19,34 +19,22 @@ extern unsigned long find_e820_area(unsigned long start, unsigned long end,
 extern unsigned long find_e820_area_size(unsigned long start,
 					 unsigned long *sizep,
 					 unsigned long align);
-extern void add_memory_region(unsigned long start, unsigned long size,
-			      int type);
-extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
-				unsigned new_type);
 extern void setup_memory_region(void);
 extern void contig_e820_setup(void);
 extern unsigned long e820_end_of_ram(void);
 extern void e820_reserve_resources(void);
 extern void e820_mark_nosave_regions(void);
-extern int e820_any_mapped(unsigned long start, unsigned long end,
-			   unsigned type);
-extern int e820_all_mapped(unsigned long start, unsigned long end,
-			   unsigned type);
 extern int e820_any_non_reserved(unsigned long start, unsigned long end);
 extern int is_memory_any_valid(unsigned long start, unsigned long end);
 extern int e820_all_non_reserved(unsigned long start, unsigned long end);
 extern int is_memory_all_valid(unsigned long start, unsigned long end);
 extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
 
-extern void e820_setup_gap(void);
 extern void e820_register_active_regions(int nid, unsigned long start_pfn,
 					 unsigned long end_pfn);
 
 extern void finish_e820_parsing(void);
 
-extern struct e820map e820;
-extern void update_e820(void);
-
 extern void reserve_early(unsigned long start, unsigned long end, char *name);
 extern void free_early(unsigned long start, unsigned long end);
 extern void early_res_to_bootmem(unsigned long start, unsigned long end);
-- 
cgit v1.2.3-70-g09d2


From e3f8ba81fdce852abe36a33eed7b243c4a0acfac Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Wed, 14 May 2008 08:15:04 -0700
Subject: x86 boot: include missing smp.h header

The patch:
    x86: convert cpu_to_apicid to be a per cpu variable
introduced a dependency of ipi.h on smp.h in x86
builds with an allnoconfig.  Including smp.h in ipi.h
fixes the build error:
    In file included from arch/x86/kernel/traps_64.c:48:
    include/asm/ipi.h: In function 'send_IPI_mask_sequence':
    include/asm/ipi.h:114: error: 'per_cpu__x86_cpu_to_apicid' undeclared (first use in this function)

Signed-off-by: Paul Jackson <pj@sgi.com>
Cc: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/ipi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h
index ecc80f341f3..196d63c28aa 100644
--- a/include/asm-x86/ipi.h
+++ b/include/asm-x86/ipi.h
@@ -20,6 +20,7 @@
 
 #include <asm/hw_irq.h>
 #include <asm/apic.h>
+#include <asm/smp.h>
 
 /*
  * the following functions deal with sending IPIs between CPUs.
-- 
cgit v1.2.3-70-g09d2


From c3965bd15118742d72b4bc1a290d37b3f081eb98 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Wed, 14 May 2008 08:15:34 -0700
Subject: x86 boot: proper use of ARRAY_SIZE instead of repeated E820MAX
 constant

This patch is motivated by a subsequent patch which will allow for more
memory map entries on EFI supported systems than can be passed via the x86
legacy BIOS E820 interface.  The legacy interface is limited to E820MAX ==
128 memory entries, and that "E820MAX" manifest constant was used as the
size for several arrays and loops over those arrays.

The primary change in this patch is to change code loop sizes over those
arrays from using the constant E820MAX, to using the ARRAY_SIZE() macro
evaluated for the array being looped.  That way, a subsequent patch can
change the size of some of these arrays, without breaking this code.

This patch also adds a parameter to the sanitize_e820_map() routine,
which had an implicit size for the array passed it of E820MAX entries.
This new parameter explicitly passes the size of said array.  Once again,
this will allow a subsequent patch to change that array size for some
calls to sanitize_e820_map() without breaking the code.

As part of enhancing the sanitize_e820_map() interface this way, I further
combined the unnecessarily distinct x86_32 and x86_64 declarations for
this routine into a single, commonly used, declaration.

This patch in itself should make no difference to the resulting kernel
binary.

[ mingo@elte.hu: merged to -tip ]

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/boot/memory.c        | 3 ++-
 arch/x86/kernel/e820.c        | 9 +++++----
 arch/x86/kernel/e820_64.c     | 6 ++++--
 arch/x86/mach-default/setup.c | 4 +++-
 arch/x86/mach-voyager/setup.c | 4 +++-
 include/asm-x86/e820.h        | 3 ++-
 include/asm-x86/setup.h       | 1 -
 7 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index acad32eb429..53165c97336 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -13,6 +13,7 @@
  */
 
 #include "boot.h"
+#include <linux/kernel.h>
 
 #define SMAP	0x534d4150	/* ASCII "SMAP" */
 
@@ -53,7 +54,7 @@ static int detect_memory_e820(void)
 
 		count++;
 		desc++;
-	} while (next && count < E820MAX);
+	} while (next && count < ARRAY_SIZE(boot_params.e820_map));
 
 	return boot_params.e820_entries = count;
 }
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 2cb686f60d0..2396b9da802 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -95,7 +95,7 @@ void __init add_memory_region(u64 start, u64 size, int type)
 {
 	int x = e820.nr_map;
 
-	if (x == E820MAX) {
+	if (x == ARRAY_SIZE(e820.map)) {
 		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
 		return;
 	}
@@ -142,7 +142,8 @@ void __init e820_print_map(char *who)
  * replaces the original e820 map with a new one, removing overlaps.
  *
  */
-int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
+int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
+				char *pnr_map)
 {
 	struct change_member {
 		struct e820entry *pbios; /* pointer to original bios entry */
@@ -314,7 +315,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
 					 * no more space left for new
 					 * bios entries ?
 					 */
-					if (++new_bios_entry >= E820MAX)
+					if (++new_bios_entry >= max_nr_map)
 						break;
 			}
 			if (current_type != 0)	{
@@ -403,7 +404,7 @@ void __init update_e820(void)
 	u8 nr_map;
 
 	nr_map = e820.nr_map;
-	if (sanitize_e820_map(e820.map, &nr_map))
+	if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
 		return;
 	e820.nr_map = nr_map;
 	printk(KERN_INFO "modified physical RAM map:\n");
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index c10b4aece5e..58dc6eee4d4 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -413,7 +413,9 @@ char *__init machine_specific_memory_setup(void)
 	 * Otherwise fake a memory map; one section from 0k->640k,
 	 * the next section from 1mb->appropriate_mem_k
 	 */
-	sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
+	sanitize_e820_map(boot_params.e820_map,
+			ARRAY_SIZE(boot_params.e820_map),
+			&boot_params.e820_entries);
 	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
 		early_panic("Cannot find a valid memory map");
 	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
@@ -484,7 +486,7 @@ void __init finish_e820_parsing(void)
 	if (userdef) {
 		char nr = e820.nr_map;
 
-		if (sanitize_e820_map(e820.map, &nr) < 0)
+		if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
 			early_panic("Invalid user supplied memory map");
 		e820.nr_map = nr;
 
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 0c28a071824..38a856c4fc0 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -163,7 +163,9 @@ char * __init machine_specific_memory_setup(void)
 	 * Otherwise fake a memory map; one section from 0k->640k,
 	 * the next section from 1mb->appropriate_mem_k
 	 */
-	sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
+	sanitize_e820_map(boot_params.e820_map,
+			ARRAY_SIZE(boot_params.e820_map),
+			&boot_params.e820_entries);
 	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries)
 	    < 0) {
 		unsigned long mem_size;
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index 5ae5466b9eb..662b5c0a77d 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -111,7 +111,9 @@ char *__init machine_specific_memory_setup(void)
 	 * Otherwise fake a memory map; one section from 0k->640k,
 	 * the next section from 1mb->appropriate_mem_k
 	 */
-	sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
+	sanitize_e820_map(boot_params.e820_map,
+			ARRAY_SIZE(boot_params.e820_map),
+			&boot_params.e820_entries);
 	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries)
 	    < 0) {
 		unsigned long mem_size;
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index b5b519feba1..65c891d2a4c 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -27,7 +27,8 @@ extern int e820_any_mapped(u64 start, u64 end, unsigned type);
 extern int e820_all_mapped(u64 start, u64 end, unsigned type);
 extern void add_memory_region(u64 start, u64 size, int type);
 extern void e820_print_map(char *who);
-extern int sanitize_e820_map(struct e820entry *biosmap, char *pnr_map);
+extern int
+sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, char *pnr_map);
 extern int copy_e820_map(struct e820entry *biosmap, int nr_map);
 extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
 			       unsigned new_type);
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index fa6763af8d2..ffa0f540fc7 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -56,7 +56,6 @@ char * __init machine_specific_memory_setup(void);
 char *memory_setup(void);
 
 int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
-int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map);
 void __init add_memory_region(unsigned long long start,
 			      unsigned long long size, int type);
 
-- 
cgit v1.2.3-70-g09d2


From 028b785888c523baccdf27af0cdbf1deb92edec0 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Wed, 14 May 2008 08:15:40 -0700
Subject: x86 boot: extend some internal memory map arrays to handle larger EFI
 input

Extend internal boot time memory tables to allow for up to
three entries per node, which may be larger than the 128 E820MAX
entries handled by the legacy BIOS E820 interface.  The EFI
interface, if present, is capable of passing memory map
entries for these larger node counts.

This patch requires an earlier patch that rewrote code depending
on these array sizes from using E820MAX explicitly to size loops,
to instead using ARRAY_SIZE() of the applicable array.

Another patch following this one will provide the code to pick
up additional memory entries passed via the EFI interface from
the BIOS and insert them in the following, now enlarged, arrays.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c |  8 ++++----
 include/asm-x86/e820.h | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 5 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 2396b9da802..3f7777b255a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -149,10 +149,10 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
 		struct e820entry *pbios; /* pointer to original bios entry */
 		unsigned long long addr; /* address for this change point */
 	};
-	static struct change_member change_point_list[2*E820MAX] __initdata;
-	static struct change_member *change_point[2*E820MAX] __initdata;
-	static struct e820entry *overlap_list[E820MAX] __initdata;
-	static struct e820entry new_bios[E820MAX] __initdata;
+static struct change_member change_point_list[2*E820_X_MAX] __initdata;
+static struct change_member *change_point[2*E820_X_MAX] __initdata;
+static struct e820entry *overlap_list[E820_X_MAX] __initdata;
+static struct e820entry new_bios[E820_X_MAX] __initdata;
 	struct change_member *change_tmp;
 	unsigned long current_type, last_type;
 	unsigned long long last_addr;
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 65c891d2a4c..ab18457a95c 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -2,6 +2,41 @@
 #define __ASM_E820_H
 #define E820MAP	0x2d0		/* our map */
 #define E820MAX	128		/* number of entries in E820MAP */
+
+/*
+ * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the
+ * constrained space in the zeropage.  If we have more nodes than
+ * that, and if we've booted off EFI firmware, then the EFI tables
+ * passed us from the EFI firmware can list more nodes.  Size our
+ * internal memory map tables to have room for these additional
+ * nodes, based on up to three entries per node for which the
+ * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT),
+ * plus E820MAX, allowing space for the possible duplicate E820
+ * entries that might need room in the same arrays, prior to the
+ * call to sanitize_e820_map() to remove duplicates.  The allowance
+ * of three memory map entries per node is "enough" entries for
+ * the initial hardware platform motivating this mechanism to make
+ * use of additional EFI map entries.  Future platforms may want
+ * to allow more than three entries per node or otherwise refine
+ * this size.
+ */
+
+/*
+ * Odd: 'make headers_check' complains about numa.h if I try
+ * to collapse the next two #ifdef lines to a single line:
+ *	#if defined(__KERNEL__) && defined(CONFIG_EFI)
+ */
+#ifdef __KERNEL__
+#ifdef CONFIG_EFI
+#include <linux/numa.h>
+#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES)
+#else	/* ! CONFIG_EFI */
+#define E820_X_MAX E820MAX
+#endif
+#else	/* ! __KERNEL__ */
+#define E820_X_MAX E820MAX
+#endif
+
 #define E820NR	0x1e8		/* # entries in E820MAP */
 
 #define E820_RAM	1
@@ -18,7 +53,7 @@ struct e820entry {
 
 struct e820map {
 	__u32 nr_map;
-	struct e820entry map[E820MAX];
+	struct e820entry map[E820_X_MAX];
 };
 
 extern struct e820map e820;
-- 
cgit v1.2.3-70-g09d2


From 6e9bcc796b120d17b08dde7ab958b82ddb899889 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Wed, 14 May 2008 08:15:46 -0700
Subject: x86 boot: change sanitize_e820_map parameter from byte to int to
 allow bigger memory maps

The map size counter passed into, and back out of, sanitize_e820_map(),
was an eight bit type (char or u8), as derived from its origins in
legacy BIOS E820 structures.  This patch changes that type to an 'int',
to allow this sanitize routine to also be used on larger maps (larger
than the 256 count that fits in a char).  The legacy BIOS E820 interface
of course does not change; that remains at 8 bits for this count, holding
up to E820MAX == 128 entries.  But the kernel internals can handle more
when those additional memory map entries are passed from the BIOS via
EFI interfaces.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c        | 5 +++--
 arch/x86/kernel/e820_64.c     | 7 +++++--
 arch/x86/mach-default/setup.c | 5 ++++-
 arch/x86/mach-voyager/setup.c | 5 ++++-
 include/asm-x86/e820.h        | 2 +-
 5 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 3f7777b255a..91abf5b2fb9 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -143,7 +143,7 @@ void __init e820_print_map(char *who)
  *
  */
 int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
-				char *pnr_map)
+				int *pnr_map)
 {
 	struct change_member {
 		struct e820entry *pbios; /* pointer to original bios entry */
@@ -204,6 +204,7 @@ static struct e820entry new_bios[E820_X_MAX] __initdata;
 		return -1;
 
 	old_nr = *pnr_map;
+	BUG_ON(old_nr > max_nr_map);
 
 	/* bail out if we find any unreasonable addresses in bios map */
 	for (i = 0; i < old_nr; i++)
@@ -401,7 +402,7 @@ u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
 
 void __init update_e820(void)
 {
-	u8 nr_map;
+	int nr_map;
 
 	nr_map = e820.nr_map;
 	if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 58dc6eee4d4..354fbb22170 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -407,15 +407,18 @@ static void early_panic(char *msg)
 char *__init machine_specific_memory_setup(void)
 {
 	char *who = "BIOS-e820";
+	int new_nr;
 	/*
 	 * Try to copy the BIOS-supplied E820-map.
 	 *
 	 * Otherwise fake a memory map; one section from 0k->640k,
 	 * the next section from 1mb->appropriate_mem_k
 	 */
+	new_nr = boot_params.e820_entries;
 	sanitize_e820_map(boot_params.e820_map,
 			ARRAY_SIZE(boot_params.e820_map),
-			&boot_params.e820_entries);
+			&new_nr);
+	boot_params.e820_entries = new_nr;
 	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
 		early_panic("Cannot find a valid memory map");
 	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
@@ -484,7 +487,7 @@ early_param("memmap", parse_memmap_opt);
 void __init finish_e820_parsing(void)
 {
 	if (userdef) {
-		char nr = e820.nr_map;
+		int nr = e820.nr_map;
 
 		if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
 			early_panic("Invalid user supplied memory map");
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 38a856c4fc0..56b4c39cb7f 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -153,6 +153,7 @@ late_initcall(print_ipi_mode);
 char * __init machine_specific_memory_setup(void)
 {
 	char *who;
+	int new_nr;
 
 
 	who = "BIOS-e820";
@@ -163,9 +164,11 @@ char * __init machine_specific_memory_setup(void)
 	 * Otherwise fake a memory map; one section from 0k->640k,
 	 * the next section from 1mb->appropriate_mem_k
 	 */
+	new_nr = boot_params.e820_entries;
 	sanitize_e820_map(boot_params.e820_map,
 			ARRAY_SIZE(boot_params.e820_map),
-			&boot_params.e820_entries);
+			&new_nr);
+	boot_params.e820_entries = new_nr;
 	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries)
 	    < 0) {
 		unsigned long mem_size;
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index 662b5c0a77d..f4aca9fa954 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -62,6 +62,7 @@ void __init time_init_hook(void)
 char *__init machine_specific_memory_setup(void)
 {
 	char *who;
+	int new_nr;
 
 	who = "NOT VOYAGER";
 
@@ -111,9 +112,11 @@ char *__init machine_specific_memory_setup(void)
 	 * Otherwise fake a memory map; one section from 0k->640k,
 	 * the next section from 1mb->appropriate_mem_k
 	 */
+	new_nr = boot_params.e820_entries;
 	sanitize_e820_map(boot_params.e820_map,
 			ARRAY_SIZE(boot_params.e820_map),
-			&boot_params.e820_entries);
+			&new_nr);
+	boot_params.e820_entries = new_nr;
 	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries)
 	    < 0) {
 		unsigned long mem_size;
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index ab18457a95c..1eb13b88143 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -63,7 +63,7 @@ extern int e820_all_mapped(u64 start, u64 end, unsigned type);
 extern void add_memory_region(u64 start, u64 size, int type);
 extern void e820_print_map(char *who);
 extern int
-sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, char *pnr_map);
+sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map);
 extern int copy_e820_map(struct e820entry *biosmap, int nr_map);
 extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
 			       unsigned new_type);
-- 
cgit v1.2.3-70-g09d2


From a4c81cf684350797939416c99effb9d3ae46bca6 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 18 May 2008 01:18:57 -0700
Subject: x86: extend e820 ealy_res support 32bit

move early_res related from e820_64.c to e820.c
make edba detection to be done in head32.c
remove smp_alloc_memory, because we have fixed trampoline address now.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

 arch/x86/kernel/e820.c              |  214 ++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/e820_64.c           |  196 --------------------------------
 arch/x86/kernel/head32.c            |   76 ++++++++++++
 arch/x86/kernel/setup_32.c          |  109 +++---------------
 arch/x86/kernel/smpboot.c           |   17 --
 arch/x86/kernel/trampoline.c        |    2
 arch/x86/mach-voyager/voyager_smp.c |    9 -
 include/asm-x86/e820.h              |    6 +
 include/asm-x86/e820_64.h           |    9 -
 include/asm-x86/smp.h               |    1
 arch/x86/kernel/e820.c              |  214 ++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/e820_64.c           |  196 --------------------------------
 arch/x86/kernel/head32.c            |   76 ++++++++++++
 arch/x86/kernel/setup_32.c          |  109 +++---------------
 arch/x86/kernel/smpboot.c           |   17 --
 arch/x86/kernel/trampoline.c        |    2
 arch/x86/mach-voyager/voyager_smp.c |    9 -
 include/asm-x86/e820.h              |    6 +
 include/asm-x86/e820_64.h           |    9 -
 include/asm-x86/smp.h               |    1
 arch/x86/kernel/e820.c              |  214 ++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/e820_64.c           |  196 --------------------------------
 arch/x86/kernel/head32.c            |   76 ++++++++++++
 arch/x86/kernel/setup_32.c          |  109 +++---------------
 arch/x86/kernel/smpboot.c           |   17 --
 arch/x86/kernel/trampoline.c        |    2
 arch/x86/mach-voyager/voyager_smp.c |    9 -
 include/asm-x86/e820.h              |    6 +
 include/asm-x86/e820_64.h           |    9 -
 include/asm-x86/smp.h               |    1
 10 files changed, 320 insertions(+), 319 deletions(-)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c              | 214 ++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/e820_64.c           | 196 ---------------------------------
 arch/x86/kernel/head32.c            |  76 +++++++++++++
 arch/x86/kernel/setup_32.c          | 109 ++++--------------
 arch/x86/kernel/smpboot.c           |  17 ---
 arch/x86/kernel/trampoline.c        |   2 +-
 arch/x86/mach-voyager/voyager_smp.c |   9 --
 include/asm-x86/e820.h              |   6 +
 include/asm-x86/e820_64.h           |   9 --
 include/asm-x86/smp.h               |   1 -
 10 files changed, 320 insertions(+), 319 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 41c480ae47d..35da8cdbe5e 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -22,7 +22,9 @@
 #include <asm/pgtable.h>
 #include <asm/page.h>
 #include <asm/e820.h>
+#include <asm/proto.h>
 #include <asm/setup.h>
+#include <asm/trampoline.h>
 
 struct e820map e820;
 
@@ -493,3 +495,215 @@ __init void e820_setup_gap(void)
 	       pci_mem_start, gapstart, gapsize);
 }
 
+
+/*
+ * Early reserved memory areas.
+ */
+#define MAX_EARLY_RES 20
+
+struct early_res {
+	u64 start, end;
+	char name[16];
+};
+static struct early_res early_res[MAX_EARLY_RES] __initdata = {
+	{ 0, PAGE_SIZE, "BIOS data page" },	/* BIOS data page */
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
+	{ TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
+#endif
+#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
+	/*
+	 * But first pinch a few for the stack/trampoline stuff
+	 * FIXME: Don't need the extra page at 4K, but need to fix
+	 * trampoline before removing it. (see the GDT stuff)
+	 */
+	{ PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
+	/*
+	 * Has to be in very low memory so we can execute
+	 * real-mode AP code.
+	 */
+	{ TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
+#endif
+	{}
+};
+
+void __init reserve_early(u64 start, u64 end, char *name)
+{
+	int i;
+	struct early_res *r;
+	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+		r = &early_res[i];
+		if (end > r->start && start < r->end)
+			panic("Overlapping early reservations %llx-%llx %s to %llx-%llx %s\n",
+			      start, end - 1, name?name:"", r->start,
+			      r->end - 1, r->name);
+	}
+	if (i >= MAX_EARLY_RES)
+		panic("Too many early reservations");
+	r = &early_res[i];
+	r->start = start;
+	r->end = end;
+	if (name)
+		strncpy(r->name, name, sizeof(r->name) - 1);
+}
+
+void __init free_early(u64 start, u64 end)
+{
+	struct early_res *r;
+	int i, j;
+
+	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+		r = &early_res[i];
+		if (start == r->start && end == r->end)
+			break;
+	}
+	if (i >= MAX_EARLY_RES || !early_res[i].end)
+		panic("free_early on not reserved area: %llx-%llx!",
+			 start, end);
+
+	for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
+		;
+
+	memmove(&early_res[i], &early_res[i + 1],
+	       (j - 1 - i) * sizeof(struct early_res));
+
+	early_res[j - 1].end = 0;
+}
+
+void __init early_res_to_bootmem(u64 start, u64 end)
+{
+	int i;
+	u64 final_start, final_end;
+	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+		struct early_res *r = &early_res[i];
+		final_start = max(start, r->start);
+		final_end = min(end, r->end);
+		if (final_start >= final_end)
+			continue;
+		printk(KERN_INFO "  early res: %d [%llx-%llx] %s\n", i,
+			final_start, final_end - 1, r->name);
+#ifdef CONFIG_X86_64
+		reserve_bootmem_generic(final_start, final_end - final_start);
+#else
+		reserve_bootmem(final_start, final_end - final_start,
+				BOOTMEM_DEFAULT);
+#endif
+	}
+}
+
+/* Check for already reserved areas */
+static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
+{
+	int i;
+	u64 addr = *addrp, last;
+	int changed = 0;
+again:
+	last = addr + size;
+	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+		struct early_res *r = &early_res[i];
+		if (last >= r->start && addr < r->end) {
+			*addrp = addr = round_up(r->end, align);
+			changed = 1;
+			goto again;
+		}
+	}
+	return changed;
+}
+
+/* Check for already reserved areas */
+static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
+{
+	int i;
+	u64 addr = *addrp, last;
+	u64 size = *sizep;
+	int changed = 0;
+again:
+	last = addr + size;
+	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+		struct early_res *r = &early_res[i];
+		if (last > r->start && addr < r->start) {
+			size = r->start - addr;
+			changed = 1;
+			goto again;
+		}
+		if (last > r->end && addr < r->end) {
+			addr = round_up(r->end, align);
+			size = last - addr;
+			changed = 1;
+			goto again;
+		}
+		if (last <= r->end && addr >= r->start) {
+			(*sizep)++;
+			return 0;
+		}
+	}
+	if (changed) {
+		*addrp = addr;
+		*sizep = size;
+	}
+	return changed;
+}
+
+/*
+ * Find a free area with specified alignment in a specific range.
+ */
+u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
+{
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		u64 addr, last;
+		u64 ei_last;
+
+		if (ei->type != E820_RAM)
+			continue;
+		addr = round_up(ei->addr, align);
+		ei_last = ei->addr + ei->size;
+		if (addr < start)
+			addr = round_up(start, align);
+		if (addr >= ei_last)
+			continue;
+		while (bad_addr(&addr, size, align) && addr+size <= ei_last)
+			;
+		last = addr + size;
+		if (last > ei_last)
+			continue;
+		if (last > end)
+			continue;
+		return addr;
+	}
+	return -1ULL;
+}
+
+/*
+ * Find next free range after *start
+ */
+u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
+{
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		u64 addr, last;
+		u64 ei_last;
+
+		if (ei->type != E820_RAM)
+			continue;
+		addr = round_up(ei->addr, align);
+		ei_last = ei->addr + ei->size;
+		if (addr < start)
+			addr = round_up(start, align);
+		if (addr >= ei_last)
+			continue;
+		*sizep = ei_last - addr;
+		while (bad_addr_size(&addr, sizep, align) &&
+			addr + *sizep <= ei_last)
+			;
+		last = addr + *sizep;
+		if (last > ei_last)
+			continue;
+		return addr;
+	}
+	return -1UL;
+
+}
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 354fbb22170..07941b55451 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -46,202 +46,6 @@ unsigned long max_pfn_mapped;
  */
 static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
 
-/*
- * Early reserved memory areas.
- */
-#define MAX_EARLY_RES 20
-
-struct early_res {
-	unsigned long start, end;
-	char name[16];
-};
-static struct early_res early_res[MAX_EARLY_RES] __initdata = {
-	{ 0, PAGE_SIZE, "BIOS data page" },			/* BIOS data page */
-#ifdef CONFIG_X86_TRAMPOLINE
-	{ TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
-#endif
-	{}
-};
-
-void __init reserve_early(unsigned long start, unsigned long end, char *name)
-{
-	int i;
-	struct early_res *r;
-	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
-		r = &early_res[i];
-		if (end > r->start && start < r->end)
-			panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n",
-			      start, end - 1, name?name:"", r->start, r->end - 1, r->name);
-	}
-	if (i >= MAX_EARLY_RES)
-		panic("Too many early reservations");
-	r = &early_res[i];
-	r->start = start;
-	r->end = end;
-	if (name)
-		strncpy(r->name, name, sizeof(r->name) - 1);
-}
-
-void __init free_early(unsigned long start, unsigned long end)
-{
-	struct early_res *r;
-	int i, j;
-
-	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
-		r = &early_res[i];
-		if (start == r->start && end == r->end)
-			break;
-	}
-	if (i >= MAX_EARLY_RES || !early_res[i].end)
-		panic("free_early on not reserved area: %lx-%lx!", start, end);
-
-	for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
-		;
-
-	memmove(&early_res[i], &early_res[i + 1],
-	       (j - 1 - i) * sizeof(struct early_res));
-
-	early_res[j - 1].end = 0;
-}
-
-void __init early_res_to_bootmem(unsigned long start, unsigned long end)
-{
-	int i;
-	unsigned long final_start, final_end;
-	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
-		struct early_res *r = &early_res[i];
-		final_start = max(start, r->start);
-		final_end = min(end, r->end);
-		if (final_start >= final_end)
-			continue;
-		printk(KERN_INFO "  early res: %d [%lx-%lx] %s\n", i,
-			final_start, final_end - 1, r->name);
-		reserve_bootmem_generic(final_start, final_end - final_start);
-	}
-}
-
-/* Check for already reserved areas */
-static inline int __init
-bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
-{
-	int i;
-	unsigned long addr = *addrp, last;
-	int changed = 0;
-again:
-	last = addr + size;
-	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
-		struct early_res *r = &early_res[i];
-		if (last >= r->start && addr < r->end) {
-			*addrp = addr = round_up(r->end, align);
-			changed = 1;
-			goto again;
-		}
-	}
-	return changed;
-}
-
-/* Check for already reserved areas */
-static inline int __init
-bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
-{
-	int i;
-	unsigned long addr = *addrp, last;
-	unsigned long size = *sizep;
-	int changed = 0;
-again:
-	last = addr + size;
-	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
-		struct early_res *r = &early_res[i];
-		if (last > r->start && addr < r->start) {
-			size = r->start - addr;
-			changed = 1;
-			goto again;
-		}
-		if (last > r->end && addr < r->end) {
-			addr = round_up(r->end, align);
-			size = last - addr;
-			changed = 1;
-			goto again;
-		}
-		if (last <= r->end && addr >= r->start) {
-			(*sizep)++;
-			return 0;
-		}
-	}
-	if (changed) {
-		*addrp = addr;
-		*sizep = size;
-	}
-	return changed;
-}
-
-/*
- * Find a free area with specified alignment in a specific range.
- */
-unsigned long __init find_e820_area(unsigned long start, unsigned long end,
-				    unsigned long size, unsigned long align)
-{
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-		unsigned long addr, last;
-		unsigned long ei_last;
-
-		if (ei->type != E820_RAM)
-			continue;
-		addr = round_up(ei->addr, align);
-		ei_last = ei->addr + ei->size;
-		if (addr < start)
-			addr = round_up(start, align);
-		if (addr >= ei_last)
-			continue;
-		while (bad_addr(&addr, size, align) && addr+size <= ei_last)
-			;
-		last = addr + size;
-		if (last > ei_last)
-			continue;
-		if (last > end)
-			continue;
-		return addr;
-	}
-	return -1UL;
-}
-
-/*
- * Find next free range after *start
- */
-unsigned long __init find_e820_area_size(unsigned long start,
-					 unsigned long *sizep,
-					 unsigned long align)
-{
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-		unsigned long addr, last;
-		unsigned long ei_last;
-
-		if (ei->type != E820_RAM)
-			continue;
-		addr = round_up(ei->addr, align);
-		ei_last = ei->addr + ei->size;
-		if (addr < start)
-			addr = round_up(start, align);
-		if (addr >= ei_last)
-			continue;
-		*sizep = ei_last - addr;
-		while (bad_addr_size(&addr, sizep, align) &&
-			addr + *sizep <= ei_last)
-			;
-		last = addr + *sizep;
-		if (last > ei_last)
-			continue;
-		return addr;
-	}
-	return -1UL;
-
-}
 /*
  * Find the highest page frame number we have available
  */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 3db05905892..c216d3c2a99 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -8,7 +8,83 @@
 #include <linux/init.h>
 #include <linux/start_kernel.h>
 
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/e820.h>
+#include <asm/bios_ebda.h>
+
+#define BIOS_LOWMEM_KILOBYTES 0x413
+
+/*
+ * The BIOS places the EBDA/XBDA at the top of conventional
+ * memory, and usually decreases the reported amount of
+ * conventional memory (int 0x12) too. This also contains a
+ * workaround for Dell systems that neglect to reserve EBDA.
+ * The same workaround also avoids a problem with the AMD768MPX
+ * chipset: reserve a page before VGA to prevent PCI prefetch
+ * into it (errata #56). Usually the page is reserved anyways,
+ * unless you have no PS/2 mouse plugged in.
+ */
+static void __init reserve_ebda_region(void)
+{
+	unsigned int lowmem, ebda_addr;
+
+	/* To determine the position of the EBDA and the */
+	/* end of conventional memory, we need to look at */
+	/* the BIOS data area. In a paravirtual environment */
+	/* that area is absent. We'll just have to assume */
+	/* that the paravirt case can handle memory setup */
+	/* correctly, without our help. */
+	if (paravirt_enabled())
+		return;
+
+	/* end of low (conventional) memory */
+	lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
+	lowmem <<= 10;
+
+	/* start of EBDA area */
+	ebda_addr = get_bios_ebda();
+
+	/* Fixup: bios puts an EBDA in the top 64K segment */
+	/* of conventional memory, but does not adjust lowmem. */
+	if ((lowmem - ebda_addr) <= 0x10000)
+		lowmem = ebda_addr;
+
+	/* Fixup: bios does not report an EBDA at all. */
+	/* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
+	if ((ebda_addr == 0) && (lowmem >= 0x9f000))
+		lowmem = 0x9f000;
+
+	/* Paranoia: should never happen, but... */
+	if ((lowmem == 0) || (lowmem >= 0x100000))
+		lowmem = 0x9f000;
+
+	/* reserve all memory between lowmem and the 1MB mark */
+	reserve_early(lowmem, 0x100000, "BIOS reserved");
+}
+
 void __init i386_start_kernel(void)
 {
+	reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	/* Reserve INITRD */
+	if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
+		u64 ramdisk_image = boot_params.hdr.ramdisk_image;
+		u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
+		u64 ramdisk_end   = ramdisk_image + ramdisk_size;
+		reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
+	}
+#endif
+	reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE");
+
+	reserve_ebda_region();
+
+	/*
+	 * At this point everything still needed from the boot loader
+	 * or BIOS or kernel text should be early reserved or marked not
+	 * RAM in e820. All other memory is free game.
+	 */
+
 	start_kernel();
 }
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 5faeab69edd..fed482c6245 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -359,56 +359,6 @@ unsigned long __init find_max_low_pfn(void)
 	return max_low_pfn;
 }
 
-#define BIOS_LOWMEM_KILOBYTES 0x413
-
-/*
- * The BIOS places the EBDA/XBDA at the top of conventional
- * memory, and usually decreases the reported amount of
- * conventional memory (int 0x12) too. This also contains a
- * workaround for Dell systems that neglect to reserve EBDA.
- * The same workaround also avoids a problem with the AMD768MPX
- * chipset: reserve a page before VGA to prevent PCI prefetch
- * into it (errata #56). Usually the page is reserved anyways,
- * unless you have no PS/2 mouse plugged in.
- */
-static void __init reserve_ebda_region(void)
-{
-	unsigned int lowmem, ebda_addr;
-
-	/* To determine the position of the EBDA and the */
-	/* end of conventional memory, we need to look at */
-	/* the BIOS data area. In a paravirtual environment */
-	/* that area is absent. We'll just have to assume */
-	/* that the paravirt case can handle memory setup */
-	/* correctly, without our help. */
-	if (paravirt_enabled())
-		return;
-
-	/* end of low (conventional) memory */
-	lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
-	lowmem <<= 10;
-
-	/* start of EBDA area */
-	ebda_addr = get_bios_ebda();
-
-	/* Fixup: bios puts an EBDA in the top 64K segment */
-	/* of conventional memory, but does not adjust lowmem. */
-	if ((lowmem - ebda_addr) <= 0x10000)
-		lowmem = ebda_addr;
-
-	/* Fixup: bios does not report an EBDA at all. */
-	/* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
-	if ((ebda_addr == 0) && (lowmem >= 0x9f000))
-		lowmem = 0x9f000;
-
-	/* Paranoia: should never happen, but... */
-	if ((lowmem == 0) || (lowmem >= 0x100000))
-		lowmem = 0x9f000;
-
-	/* reserve all memory between lowmem and the 1MB mark */
-	reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT);
-}
-
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 static void __init setup_bootmem_allocator(void);
 static unsigned long __init setup_memory(void)
@@ -522,25 +472,32 @@ static void __init reserve_initrd(void)
 	unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
 	unsigned long ramdisk_here;
 
-	initrd_start = 0;
-
 	if (!boot_params.hdr.type_of_loader ||
 	    !ramdisk_image || !ramdisk_size)
 		return;		/* No initrd provided by bootloader */
 
+	initrd_start = 0;
+
 	if (ramdisk_end < ramdisk_image) {
+		free_bootmem(ramdisk_image, ramdisk_size);
 		printk(KERN_ERR "initrd wraps around end of memory, "
 		       "disabling initrd\n");
 		return;
 	}
 	if (ramdisk_size >= end_of_lowmem/2) {
+		free_bootmem(ramdisk_image, ramdisk_size);
 		printk(KERN_ERR "initrd too large to handle, "
 		       "disabling initrd\n");
 		return;
 	}
+
 	if (ramdisk_end <= end_of_lowmem) {
 		/* All in lowmem, easy case */
-		reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT);
+		/*
+		 * don't need to reserve again, already reserved early
+		 * in i386_start_kernel, and early_res_to_bootmem
+		 * convert that to reserved in bootmem
+		 */
 		initrd_start = ramdisk_image + PAGE_OFFSET;
 		initrd_end = initrd_start+ramdisk_size;
 		return;
@@ -582,6 +539,8 @@ static void __init relocate_initrd(void)
 		p = (char *)__va(ramdisk_image);
 		memcpy(q, p, clen);
 		q += clen;
+		/* need to free these low pages...*/
+		free_bootmem(ramdisk_image, clen);
 		ramdisk_image += clen;
 		ramdisk_size  -= clen;
 	}
@@ -600,47 +559,28 @@ static void __init relocate_initrd(void)
 		ramdisk_image += clen;
 		ramdisk_size  -= clen;
 	}
+	/* high pages is not converted by early_res_to_bootmem */
 }
 
 #endif /* CONFIG_BLK_DEV_INITRD */
 
 void __init setup_bootmem_allocator(void)
 {
-	unsigned long bootmap_size;
+	unsigned long bootmap_size, bootmap;
 	/*
 	 * Initialize the boot-time allocator (with low memory only):
 	 */
-	bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
-
+	bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
+	bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
+				 max_low_pfn<<PAGE_SHIFT, bootmap_size,
+				 PAGE_SIZE);
+	if (bootmap == -1L)
+		panic("Cannot find bootmem map of size %ld\n", bootmap_size);
+	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn);
 	register_bootmem_low_pages(max_low_pfn);
+	early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
+	reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
 
-	/*
-	 * Reserve the bootmem bitmap itself as well. We do this in two
-	 * steps (first step was init_bootmem()) because this catches
-	 * the (very unlikely) case of us accidentally initializing the
-	 * bootmem allocator with an invalid RAM area.
-	 */
-	reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
-			 bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text),
-			 BOOTMEM_DEFAULT);
-
-	/*
-	 * reserve physical page 0 - it's a special BIOS page on many boxes,
-	 * enabling clean reboots, SMP operation, laptop functions.
-	 */
-	reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
-
-	/* reserve EBDA region */
-	reserve_ebda_region();
-
-#ifdef CONFIG_SMP
-	/*
-	 * But first pinch a few for the stack/trampoline stuff
-	 * FIXME: Don't need the extra page at 4K, but need to fix
-	 * trampoline before removing it. (see the GDT stuff)
-	 */
-	reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT);
-#endif
 #ifdef CONFIG_ACPI_SLEEP
 	/*
 	 * Reserve low memory region for sleep support.
@@ -803,9 +743,6 @@ void __init setup_arch(char **cmdline_p)
 	 * not to exceed the 8Mb limit.
 	 */
 
-#ifdef CONFIG_SMP
-	smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
-#endif
 	paging_init();
 
 	/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 38988491c62..843722e2b79 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -555,23 +555,6 @@ cpumask_t cpu_coregroup_map(int cpu)
 		return c->llc_shared_map;
 }
 
-#ifdef CONFIG_X86_32
-/*
- * We are called very early to get the low memory for the
- * SMP bootup trampoline page.
- */
-void __init smp_alloc_memory(void)
-{
-	trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
-	/*
-	 * Has to be in very low memory so we can execute
-	 * real-mode AP code.
-	 */
-	if (__pa(trampoline_base) >= 0x9F000)
-		BUG();
-}
-#endif
-
 static void impress_friends(void)
 {
 	int cpu;
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index abbf199adeb..1106fac6024 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -2,7 +2,7 @@
 
 #include <asm/trampoline.h>
 
-/* ready for x86_64, no harm for x86, since it will overwrite after alloc */
+/* ready for x86_64 and x86 */
 unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
 
 /*
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 8acbf0cdf1a..7bbebbfe8c4 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -1137,15 +1137,6 @@ void flush_tlb_all(void)
 	on_each_cpu(do_flush_tlb_all, 0, 1, 1);
 }
 
-/* used to set up the trampoline for other CPUs when the memory manager
- * is sorted out */
-void __init smp_alloc_memory(void)
-{
-	trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
-	if (__pa(trampoline_base) >= 0x93000)
-		BUG();
-}
-
 /* send a reschedule CPI to one CPU by physical CPU number*/
 static void voyager_smp_send_reschedule(int cpu)
 {
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 1eb13b88143..5a58e2bb1d7 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -70,6 +70,12 @@ extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
 extern void update_e820(void);
 extern void e820_setup_gap(void);
 
+extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
+extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
+extern void reserve_early(u64 start, u64 end, char *name);
+extern void free_early(u64 start, u64 end);
+extern void early_res_to_bootmem(u64 start, u64 end);
+
 #endif /* __ASSEMBLY__ */
 
 #define ISA_START_ADDRESS	0xa0000
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index 9fac77e0144..37f176a02bc 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -14,11 +14,6 @@
 #include <linux/ioport.h>
 
 #ifndef __ASSEMBLY__
-extern unsigned long find_e820_area(unsigned long start, unsigned long end,
-				    unsigned long size, unsigned long align);
-extern unsigned long find_e820_area_size(unsigned long start,
-					 unsigned long *sizep,
-					 unsigned long align);
 extern void setup_memory_region(void);
 extern void contig_e820_setup(void);
 extern unsigned long e820_end_of_ram(void);
@@ -35,10 +30,6 @@ extern void e820_register_active_regions(int nid, unsigned long start_pfn,
 
 extern void finish_e820_parsing(void);
 
-extern void reserve_early(unsigned long start, unsigned long end, char *name);
-extern void free_early(unsigned long start, unsigned long end);
-extern void early_res_to_bootmem(unsigned long start, unsigned long end);
-
 #endif/*!__ASSEMBLY__*/
 
 #endif/*__E820_HEADER*/
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 1ebaa5cd311..514e52b95ce 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -201,7 +201,6 @@ extern void cpu_exit_clear(void);
 extern void cpu_uninit(void);
 #endif
 
-extern void smp_alloc_memory(void);
 extern void lock_ipi_call_lock(void);
 extern void unlock_ipi_call_lock(void);
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3-70-g09d2


From 32c5061265caf201d6a2c0d02181e2b68769c22c Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <astarikovskiy@suse.de>
Date: Wed, 14 May 2008 19:02:51 +0400
Subject: x86: move es7000_plat out of mpparse.c

Signed-off-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/mpparse.c         | 11 ++++++-----
 arch/x86/mach-es7000/es7000plat.c |  2 ++
 include/asm-x86/system.h          |  1 -
 3 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 5a18b2b9852..ff1342325ef 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -793,15 +793,14 @@ void __init find_smp_config(void)
                             ACPI-based MP Configuration
    -------------------------------------------------------------------------- */
 
-/*
- * Keep this outside and initialized to 0, for !CONFIG_ACPI builds:
- */
-int es7000_plat;
-
 #ifdef CONFIG_ACPI
 
 #ifdef	CONFIG_X86_IO_APIC
 
+#if defined(CONFIG_X86_ES7000) || defined(CONFIG_X86_GENERICARCH)
+extern int es7000_plat;
+#endif
+
 #define MP_ISA_BUS		0
 
 static struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
@@ -928,11 +927,13 @@ void __init mp_config_acpi_legacy_irqs(void)
 	set_bit(MP_ISA_BUS, mp_bus_not_pci);
 	Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
 
+#if defined(CONFIG_X86_ES7000) || defined(CONFIG_X86_GENERICARCH)
 	/*
 	 * Older generations of ES7000 have no legacy identity mappings
 	 */
 	if (es7000_plat == 1)
 		return;
+#endif
 
 	/*
 	 * Locate the IOAPIC that manages the ISA IRQs (0-15).
diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c
index f5d6f7d8b86..a41c77a4722 100644
--- a/arch/x86/mach-es7000/es7000plat.c
+++ b/arch/x86/mach-es7000/es7000plat.c
@@ -52,6 +52,8 @@ static struct mip_reg		*host_reg;
 static int 			mip_port;
 static unsigned long		mip_addr, host_addr;
 
+int es7000_plat;
+
 /*
  * GSI override for ES7000 platforms.
  */
diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index a2f04cd79b2..9f7f63ba004 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -303,7 +303,6 @@ static inline void clflush(volatile void *__p)
 void disable_hlt(void);
 void enable_hlt(void);
 
-extern int es7000_plat;
 void cpu_idle_wait(void);
 
 extern unsigned long arch_align_stack(unsigned long sp);
-- 
cgit v1.2.3-70-g09d2


From 5f8951487ddbacbc949e9ffae574f94791f9b4dd Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <astarikovskiy@suse.de>
Date: Wed, 14 May 2008 19:03:04 +0400
Subject: x86: make mp_ioapic_routing definition local

Signed-off-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c | 7 ++++++-
 include/asm-x86/io_apic.h   | 7 -------
 2 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b2ad09c4c6a..f75c2030f4b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -852,7 +852,12 @@ static int __init acpi_parse_madt_lapic_entries(void)
 extern int es7000_plat;
 #endif
 
-static struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
+static struct {
+	int apic_id;
+	int gsi_base;
+	int gsi_end;
+	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
 
 static int mp_find_ioapic(int gsi)
 {
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index d593e14f034..2ef96f0596e 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -112,13 +112,6 @@ extern int nr_ioapic_registers[MAX_IO_APICS];
 
 #define MP_MAX_IOAPIC_PIN 127
 
-struct mp_ioapic_routing {
-	int apic_id;
-	int gsi_base;
-	int gsi_end;
-	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
-};
-
 /* I/O APIC entries */
 extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
 
-- 
cgit v1.2.3-70-g09d2


From ec2cd0a22e2715f776a934e01c4f8ea098324fe1 Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <astarikovskiy@suse.de>
Date: Wed, 14 May 2008 19:03:10 +0400
Subject: x86: make struct config_ioapic not MPspec specific

Signed-off-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c  | 28 ++++++++++++------------
 arch/x86/kernel/apic_32.c    |  2 +-
 arch/x86/kernel/io_apic_32.c | 52 ++++++++++++++++++++++----------------------
 arch/x86/kernel/io_apic_64.c | 26 +++++++++++-----------
 arch/x86/kernel/mpparse.c    |  8 +++++--
 include/asm-x86/io_apic.h    | 10 ++++++++-
 6 files changed, 69 insertions(+), 57 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f75c2030f4b..a26dd91faf0 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -887,8 +887,8 @@ static u8 __init uniq_ioapic_id(u8 id)
 	DECLARE_BITMAP(used, 256);
 	bitmap_zero(used, 256);
 	for (i = 0; i < nr_ioapics; i++) {
-		struct mpc_config_ioapic *ia = &mp_ioapics[i];
-		__set_bit(ia->mpc_apicid, used);
+		struct mp_config_ioapic *ia = &mp_ioapics[i];
+		__set_bit(ia->mp_apicid, used);
 	}
 	if (!test_bit(id, used))
 		return id;
@@ -920,29 +920,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 
 	idx = nr_ioapics;
 
-	mp_ioapics[idx].mpc_type = MP_IOAPIC;
-	mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
-	mp_ioapics[idx].mpc_apicaddr = address;
+	mp_ioapics[idx].mp_type = MP_IOAPIC;
+	mp_ioapics[idx].mp_flags = MPC_APIC_USABLE;
+	mp_ioapics[idx].mp_apicaddr = address;
 
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-	mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
+	mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id);
 #ifdef CONFIG_X86_32
-	mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
+	mp_ioapics[idx].mp_apicver = io_apic_get_version(idx);
 #else
-	mp_ioapics[idx].mpc_apicver = 0;
+	mp_ioapics[idx].mp_apicver = 0;
 #endif
 	/*
 	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
 	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
 	 */
-	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
+	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid;
 	mp_ioapic_routing[idx].gsi_base = gsi_base;
 	mp_ioapic_routing[idx].gsi_end = gsi_base +
 	    io_apic_get_redir_entries(idx);
 
-	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
-	       "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
-	       mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
+	       "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid,
+	       mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr,
 	       mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
 
 	nr_ioapics++;
@@ -975,7 +975,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 	mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS;
 	mp_irqs[mp_irq_entries].mpc_srcbusirq = bus_irq;	/* IRQ */
 	mp_irqs[mp_irq_entries].mpc_dstapic =
-			mp_ioapics[ioapic].mpc_apicid;	/* APIC ID */
+			mp_ioapics[ioapic].mp_apicid;	/* APIC ID */
 	mp_irqs[mp_irq_entries].mpc_dstirq = pin;	/* INTIN# */
 
 	if (++mp_irq_entries == MAX_IRQ_SOURCES)
@@ -1016,7 +1016,7 @@ void __init mp_config_acpi_legacy_irqs(void)
 	mp_irqs[mp_irq_entries].mpc_irqflag = 0;	/* Conforming */
 	mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS;
 #ifdef CONFIG_X86_IO_APIC
-	mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+	mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mp_apicid;
 #endif
 	/*
 	 * Use the default configuration for the IRQs 0-15.  Unless
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4b99b1bdeb6..fa1be1d6291 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1202,7 +1202,7 @@ void __init init_apic_mappings(void)
 
 		for (i = 0; i < nr_ioapics; i++) {
 			if (smp_found_config) {
-				ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+				ioapic_phys = mp_ioapics[i].mp_apicaddr;
 				if (!ioapic_phys) {
 					printk(KERN_ERR
 					       "WARNING: bogus zero IO-APIC "
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index a40d54fc1fd..5af1b717236 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -72,7 +72,7 @@ int sis_apic_bug = -1;
 int nr_ioapic_registers[MAX_IO_APICS];
 
 /* I/O APIC entries */
-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
 
 /* MP IRQ source entries */
@@ -110,7 +110,7 @@ struct io_apic {
 static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
 {
 	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
-		+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+		+ (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
 }
 
 static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -802,7 +802,7 @@ static int find_irq_entry(int apic, int pin, int type)
 
 	for (i = 0; i < mp_irq_entries; i++)
 		if (mp_irqs[i].mpc_irqtype == type &&
-		    (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
+		    (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mp_apicid ||
 		     mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
 		    mp_irqs[i].mpc_dstirq == pin)
 			return i;
@@ -844,7 +844,7 @@ static int __init find_isa_irq_apic(int irq, int type)
 	if (i < mp_irq_entries) {
 		int apic;
 		for(apic = 0; apic < nr_ioapics; apic++) {
-			if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
+			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic)
 				return apic;
 		}
 	}
@@ -872,7 +872,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 		int lbus = mp_irqs[i].mpc_srcbus;
 
 		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
+			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic ||
 			    mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
 				break;
 
@@ -1250,12 +1250,12 @@ static void __init setup_IO_APIC_irqs(void)
 			if (first_notcon) {
 				apic_printk(APIC_VERBOSE, KERN_DEBUG
 						" IO-APIC (apicid-pin) %d-%d",
-						mp_ioapics[apic].mpc_apicid,
+						mp_ioapics[apic].mp_apicid,
 						pin);
 				first_notcon = 0;
 			} else
 				apic_printk(APIC_VERBOSE, ", %d-%d",
-					mp_ioapics[apic].mpc_apicid, pin);
+					mp_ioapics[apic].mp_apicid, pin);
 			continue;
 		}
 
@@ -1357,7 +1357,7 @@ void __init print_IO_APIC(void)
  	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
 	for (i = 0; i < nr_ioapics; i++)
 		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-		       mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+		       mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
 
 	/*
 	 * We are a bit conservative about what we expect.  We have to
@@ -1376,7 +1376,7 @@ void __init print_IO_APIC(void)
 		reg_03.raw = io_apic_read(apic, 3);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
-	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
 	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -1749,14 +1749,14 @@ static void __init setup_ioapic_ids_from_mpc(void)
 		reg_00.raw = io_apic_read(apic, 0);
 		spin_unlock_irqrestore(&ioapic_lock, flags);
 		
-		old_id = mp_ioapics[apic].mpc_apicid;
+		old_id = mp_ioapics[apic].mp_apicid;
 
-		if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
+		if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
 			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
-				apic, mp_ioapics[apic].mpc_apicid);
+				apic, mp_ioapics[apic].mp_apicid);
 			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 				reg_00.bits.ID);
-			mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
+			mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
 		}
 
 		/*
@@ -1765,9 +1765,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
 		 * 'stuck on smp_invalidate_needed IPI wait' messages.
 		 */
 		if (check_apicid_used(phys_id_present_map,
-					mp_ioapics[apic].mpc_apicid)) {
+					mp_ioapics[apic].mp_apicid)) {
 			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
-				apic, mp_ioapics[apic].mpc_apicid);
+				apic, mp_ioapics[apic].mp_apicid);
 			for (i = 0; i < get_physical_broadcast(); i++)
 				if (!physid_isset(i, phys_id_present_map))
 					break;
@@ -1776,13 +1776,13 @@ static void __init setup_ioapic_ids_from_mpc(void)
 			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 				i);
 			physid_set(i, phys_id_present_map);
-			mp_ioapics[apic].mpc_apicid = i;
+			mp_ioapics[apic].mp_apicid = i;
 		} else {
 			physid_mask_t tmp;
-			tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
+			tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
 			apic_printk(APIC_VERBOSE, "Setting %d in the "
 					"phys_id_present_map\n",
-					mp_ioapics[apic].mpc_apicid);
+					mp_ioapics[apic].mp_apicid);
 			physids_or(phys_id_present_map, phys_id_present_map, tmp);
 		}
 
@@ -1791,11 +1791,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
 		 * We need to adjust the IRQ routing table
 		 * if the ID changed.
 		 */
-		if (old_id != mp_ioapics[apic].mpc_apicid)
+		if (old_id != mp_ioapics[apic].mp_apicid)
 			for (i = 0; i < mp_irq_entries; i++)
 				if (mp_irqs[i].mpc_dstapic == old_id)
 					mp_irqs[i].mpc_dstapic
-						= mp_ioapics[apic].mpc_apicid;
+						= mp_ioapics[apic].mp_apicid;
 
 		/*
 		 * Read the right value from the MPC table and
@@ -1803,9 +1803,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
 	 	 */
 		apic_printk(APIC_VERBOSE, KERN_INFO
 			"...changing IO-APIC physical APIC ID to %d ...",
-			mp_ioapics[apic].mpc_apicid);
+			mp_ioapics[apic].mp_apicid);
 
-		reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
+		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
 		spin_lock_irqsave(&ioapic_lock, flags);
 		io_apic_write(apic, 0, reg_00.raw);
 		spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -1816,7 +1816,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
 		spin_lock_irqsave(&ioapic_lock, flags);
 		reg_00.raw = io_apic_read(apic, 0);
 		spin_unlock_irqrestore(&ioapic_lock, flags);
-		if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
+		if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
 			printk("could not set ID!\n");
 		else
 			apic_printk(APIC_VERBOSE, " ok.\n");
@@ -2355,8 +2355,8 @@ static int ioapic_resume(struct sys_device *dev)
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	reg_00.raw = io_apic_read(dev->id, 0);
-	if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
-		reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+	if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
+		reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
 		io_apic_write(dev->id, 0, reg_00.raw);
 	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2789,7 +2789,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
 
 	apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
 		"(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
-		mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
+		mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
 		edge_level, active_high_low);
 
 	ioapic_register_intr(irq, entry.vector, edge_level);
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index ef1a8dfcc52..4555ad8c207 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -104,7 +104,7 @@ DEFINE_SPINLOCK(vector_lock);
 int nr_ioapic_registers[MAX_IO_APICS];
 
 /* I/O APIC entries */
-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
 
 /* MP IRQ source entries */
@@ -140,7 +140,7 @@ struct io_apic {
 static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
 {
 	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
-		+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+		+ (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
 }
 
 static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -454,7 +454,7 @@ static int find_irq_entry(int apic, int pin, int type)
 
 	for (i = 0; i < mp_irq_entries; i++)
 		if (mp_irqs[i].mpc_irqtype == type &&
-		    (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
+		    (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mp_apicid ||
 		     mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
 		    mp_irqs[i].mpc_dstirq == pin)
 			return i;
@@ -496,7 +496,7 @@ static int __init find_isa_irq_apic(int irq, int type)
 	if (i < mp_irq_entries) {
 		int apic;
 		for(apic = 0; apic < nr_ioapics; apic++) {
-			if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
+			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic)
 				return apic;
 		}
 	}
@@ -524,7 +524,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 		int lbus = mp_irqs[i].mpc_srcbus;
 
 		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
+			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic ||
 			    mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
 				break;
 
@@ -846,7 +846,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
 		    "IRQ %d Mode:%i Active:%i)\n",
-		    apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector,
+		    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
 		    irq, trigger, polarity);
 
 	/*
@@ -887,10 +887,10 @@ static void __init setup_IO_APIC_irqs(void)
 		idx = find_irq_entry(apic,pin,mp_INT);
 		if (idx == -1) {
 			if (first_notcon) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+				apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
 				first_notcon = 0;
 			} else
-				apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+				apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
 			continue;
 		}
 		if (!first_notcon) {
@@ -965,7 +965,7 @@ void __apicdebuginit print_IO_APIC(void)
 	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
 	for (i = 0; i < nr_ioapics; i++)
 		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-		       mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+		       mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
 
 	/*
 	 * We are a bit conservative about what we expect.  We have to
@@ -983,7 +983,7 @@ void __apicdebuginit print_IO_APIC(void)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	printk("\n");
-	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
 	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 
@@ -1841,8 +1841,8 @@ static int ioapic_resume(struct sys_device *dev)
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	reg_00.raw = io_apic_read(dev->id, 0);
-	if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
-		reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+	if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
+		reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
 		io_apic_write(dev->id, 0, reg_00.raw);
 	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2336,7 +2336,7 @@ void __init ioapic_init_mappings(void)
 	ioapic_res = ioapic_setup_resources();
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
-			ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+			ioapic_phys = mp_ioapics[i].mp_apicaddr;
 		} else {
 			ioapic_phys = (unsigned long)
 				alloc_bootmem_pages(PAGE_SIZE);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index d05b70c329c..9f1e5bf7f0f 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -176,7 +176,11 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
 	if (bad_ioapic(m->mpc_apicaddr))
 		return;
 
-	mp_ioapics[nr_ioapics] = *m;
+	mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr;
+	mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid;
+	mp_ioapics[nr_ioapics].mp_type = m->mpc_type;
+	mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver;
+	mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags;
 	nr_ioapics++;
 }
 
@@ -426,7 +430,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
 	intsrc.mpc_type = MP_INTSRC;
 	intsrc.mpc_irqflag = 0;	/* conforming */
 	intsrc.mpc_srcbus = 0;
-	intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+	intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid;
 
 	intsrc.mpc_irqtype = mp_INT;
 
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 2ef96f0596e..ade76c0d03a 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -112,8 +112,16 @@ extern int nr_ioapic_registers[MAX_IO_APICS];
 
 #define MP_MAX_IOAPIC_PIN 127
 
+struct mp_config_ioapic {
+	unsigned long mp_apicaddr;
+	unsigned int mp_apicid;
+	unsigned char mp_type;
+	unsigned char mp_apicver;
+	unsigned char mp_flags;
+};
+
 /* I/O APIC entries */
-extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 
 /* # of MP IRQ source entries */
 extern int mp_irq_entries;
-- 
cgit v1.2.3-70-g09d2


From 2fddb6e28e903a3ab1704cc5aac01be5a59dc05b Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <astarikovskiy@suse.de>
Date: Wed, 14 May 2008 19:03:17 +0400
Subject: x86: make config_irqsrc not MPspec specific

Signed-off-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c  | 43 +++++++++++++++--------------
 arch/x86/kernel/io_apic_32.c | 64 ++++++++++++++++++++++----------------------
 arch/x86/kernel/io_apic_64.c | 58 +++++++++++++++++++--------------------
 arch/x86/kernel/mpparse.c    |  8 +++++-
 include/asm-x86/io_apic.h    | 12 ++++++++-
 5 files changed, 100 insertions(+), 85 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a26dd91faf0..276ec058f68 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -969,14 +969,14 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 	if ((bus_irq == 0) && (trigger == 3))
 		trigger = 1;
 
-	mp_irqs[mp_irq_entries].mpc_type = MP_INTSRC;
-	mp_irqs[mp_irq_entries].mpc_irqtype = mp_INT;
-	mp_irqs[mp_irq_entries].mpc_irqflag = (trigger << 2) | polarity;
-	mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS;
-	mp_irqs[mp_irq_entries].mpc_srcbusirq = bus_irq;	/* IRQ */
-	mp_irqs[mp_irq_entries].mpc_dstapic =
+	mp_irqs[mp_irq_entries].mp_type = MP_INTSRC;
+	mp_irqs[mp_irq_entries].mp_irqtype = mp_INT;
+	mp_irqs[mp_irq_entries].mp_irqflag = (trigger << 2) | polarity;
+	mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS;
+	mp_irqs[mp_irq_entries].mp_srcbusirq = bus_irq;	/* IRQ */
+	mp_irqs[mp_irq_entries].mp_dstapic =
 			mp_ioapics[ioapic].mp_apicid;	/* APIC ID */
-	mp_irqs[mp_irq_entries].mpc_dstirq = pin;	/* INTIN# */
+	mp_irqs[mp_irq_entries].mp_dstirq = pin;	/* INTIN# */
 
 	if (++mp_irq_entries == MAX_IRQ_SOURCES)
 		panic("Max # of irq sources exceeded!!\n");
@@ -1012,12 +1012,11 @@ void __init mp_config_acpi_legacy_irqs(void)
 	if (ioapic < 0)
 		return;
 
-	mp_irqs[mp_irq_entries].mpc_type = MP_INTSRC;
-	mp_irqs[mp_irq_entries].mpc_irqflag = 0;	/* Conforming */
-	mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS;
-#ifdef CONFIG_X86_IO_APIC
-	mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mp_apicid;
-#endif
+	mp_irqs[mp_irq_entries].mp_type = MP_INTSRC;
+	mp_irqs[mp_irq_entries].mp_irqflag = 0;	/* Conforming */
+	mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS;
+	mp_irqs[mp_irq_entries].mp_dstapic = mp_ioapics[ioapic].mp_apicid;
+
 	/*
 	 * Use the default configuration for the IRQs 0-15.  Unless
 	 * overridden by (MADT) interrupt source override entries.
@@ -1026,17 +1025,17 @@ void __init mp_config_acpi_legacy_irqs(void)
 		int idx;
 
 		for (idx = 0; idx < mp_irq_entries; idx++) {
-			struct mpc_config_intsrc *irq = mp_irqs + idx;
+			struct mp_config_intsrc *irq = mp_irqs + idx;
 
 			/* Do we already have a mapping for this ISA IRQ? */
-			if (irq->mpc_srcbus == MP_ISA_BUS
-			    && irq->mpc_srcbusirq == i)
+			if (irq->mp_srcbus == MP_ISA_BUS
+			    && irq->mp_srcbusirq == i)
 				break;
 
 			/* Do we already have a mapping for this IOAPIC pin */
-			if ((irq->mpc_dstapic ==
-				mp_irqs[mp_irq_entries].mpc_dstapic) &&
-			    (irq->mpc_dstirq == i))
+			if ((irq->mp_dstapic ==
+				mp_irqs[mp_irq_entries].mp_dstapic) &&
+			    (irq->mp_dstirq == i))
 				break;
 		}
 
@@ -1045,9 +1044,9 @@ void __init mp_config_acpi_legacy_irqs(void)
 			continue;	/* IRQ already used */
 		}
 
-		mp_irqs[mp_irq_entries].mpc_irqtype = mp_INT;
-		mp_irqs[mp_irq_entries].mpc_srcbusirq = i;	/* Identity mapped */
-		mp_irqs[mp_irq_entries].mpc_dstirq = i;
+		mp_irqs[mp_irq_entries].mp_irqtype = mp_INT;
+		mp_irqs[mp_irq_entries].mp_srcbusirq = i;	/* Identity mapped */
+		mp_irqs[mp_irq_entries].mp_dstirq = i;
 
 		if (++mp_irq_entries == MAX_IRQ_SOURCES)
 			panic("Max # of irq sources exceeded!!\n");
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 5af1b717236..ea68c3e5ba1 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -76,7 +76,7 @@ struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
 
 /* MP IRQ source entries */
-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
 /* # of MP IRQ source entries */
 int mp_irq_entries;
@@ -801,10 +801,10 @@ static int find_irq_entry(int apic, int pin, int type)
 	int i;
 
 	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].mpc_irqtype == type &&
-		    (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mp_apicid ||
-		     mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
-		    mp_irqs[i].mpc_dstirq == pin)
+		if (mp_irqs[i].mp_irqtype == type &&
+		    (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
+		     mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
+		    mp_irqs[i].mp_dstirq == pin)
 			return i;
 
 	return -1;
@@ -818,13 +818,13 @@ static int __init find_isa_irq_pin(int irq, int type)
 	int i;
 
 	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mpc_srcbus;
+		int lbus = mp_irqs[i].mp_srcbus;
 
 		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].mpc_irqtype == type) &&
-		    (mp_irqs[i].mpc_srcbusirq == irq))
+		    (mp_irqs[i].mp_irqtype == type) &&
+		    (mp_irqs[i].mp_srcbusirq == irq))
 
-			return mp_irqs[i].mpc_dstirq;
+			return mp_irqs[i].mp_dstirq;
 	}
 	return -1;
 }
@@ -834,17 +834,17 @@ static int __init find_isa_irq_apic(int irq, int type)
 	int i;
 
 	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mpc_srcbus;
+		int lbus = mp_irqs[i].mp_srcbus;
 
 		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].mpc_irqtype == type) &&
-		    (mp_irqs[i].mpc_srcbusirq == irq))
+		    (mp_irqs[i].mp_irqtype == type) &&
+		    (mp_irqs[i].mp_srcbusirq == irq))
 			break;
 	}
 	if (i < mp_irq_entries) {
 		int apic;
 		for(apic = 0; apic < nr_ioapics; apic++) {
-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic)
+			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
 				return apic;
 		}
 	}
@@ -869,23 +869,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 		return -1;
 	}
 	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mpc_srcbus;
+		int lbus = mp_irqs[i].mp_srcbus;
 
 		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic ||
-			    mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
+			    mp_irqs[i].mp_dstapic == MP_APIC_ALL)
 				break;
 
 		if (!test_bit(lbus, mp_bus_not_pci) &&
-		    !mp_irqs[i].mpc_irqtype &&
+		    !mp_irqs[i].mp_irqtype &&
 		    (bus == lbus) &&
-		    (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+		    (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
+			int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
 
 			if (!(apic || IO_APIC_IRQ(irq)))
 				continue;
 
-			if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+			if (pin == (mp_irqs[i].mp_srcbusirq & 3))
 				return irq;
 			/*
 			 * Use the first all-but-pin matching entry as a
@@ -952,7 +952,7 @@ static int EISA_ELCR(unsigned int irq)
  * EISA conforming in the MP table, that means its trigger type must
  * be read in from the ELCR */
 
-#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
 #define default_EISA_polarity(idx)	default_ISA_polarity(idx)
 
 /* PCI interrupts are always polarity one level triggered,
@@ -969,13 +969,13 @@ static int EISA_ELCR(unsigned int irq)
 
 static int MPBIOS_polarity(int idx)
 {
-	int bus = mp_irqs[idx].mpc_srcbus;
+	int bus = mp_irqs[idx].mp_srcbus;
 	int polarity;
 
 	/*
 	 * Determine IRQ line polarity (high active or low active):
 	 */
-	switch (mp_irqs[idx].mpc_irqflag & 3)
+	switch (mp_irqs[idx].mp_irqflag & 3)
 	{
 		case 0: /* conforms, ie. bus-type dependent polarity */
 		{
@@ -1012,13 +1012,13 @@ static int MPBIOS_polarity(int idx)
 
 static int MPBIOS_trigger(int idx)
 {
-	int bus = mp_irqs[idx].mpc_srcbus;
+	int bus = mp_irqs[idx].mp_srcbus;
 	int trigger;
 
 	/*
 	 * Determine IRQ trigger mode (edge or level sensitive):
 	 */
-	switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+	switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
 	{
 		case 0: /* conforms, ie. bus-type dependent */
 		{
@@ -1097,16 +1097,16 @@ static inline int irq_trigger(int idx)
 static int pin_2_irq(int idx, int apic, int pin)
 {
 	int irq, i;
-	int bus = mp_irqs[idx].mpc_srcbus;
+	int bus = mp_irqs[idx].mp_srcbus;
 
 	/*
 	 * Debugging check, we are in big trouble if this message pops up!
 	 */
-	if (mp_irqs[idx].mpc_dstirq != pin)
+	if (mp_irqs[idx].mp_dstirq != pin)
 		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
 
 	if (test_bit(bus, mp_bus_not_pci))
-		irq = mp_irqs[idx].mpc_srcbusirq;
+		irq = mp_irqs[idx].mp_srcbusirq;
 	else {
 		/*
 		 * PCI IRQs are mapped in order
@@ -1793,8 +1793,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
 		 */
 		if (old_id != mp_ioapics[apic].mp_apicid)
 			for (i = 0; i < mp_irq_entries; i++)
-				if (mp_irqs[i].mpc_dstapic == old_id)
-					mp_irqs[i].mpc_dstapic
+				if (mp_irqs[i].mp_dstapic == old_id)
+					mp_irqs[i].mp_dstapic
 						= mp_ioapics[apic].mp_apicid;
 
 		/*
@@ -2810,8 +2810,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 		return -1;
 
 	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].mpc_irqtype == mp_INT &&
-		    mp_irqs[i].mpc_srcbusirq == bus_irq)
+		if (mp_irqs[i].mp_irqtype == mp_INT &&
+		    mp_irqs[i].mp_srcbusirq == bus_irq)
 			break;
 	if (i >= mp_irq_entries)
 		return -1;
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 4555ad8c207..e7f1476ed53 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -108,7 +108,7 @@ struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
 
 /* MP IRQ source entries */
-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
 /* # of MP IRQ source entries */
 int mp_irq_entries;
@@ -453,10 +453,10 @@ static int find_irq_entry(int apic, int pin, int type)
 	int i;
 
 	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].mpc_irqtype == type &&
-		    (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mp_apicid ||
-		     mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
-		    mp_irqs[i].mpc_dstirq == pin)
+		if (mp_irqs[i].mp_irqtype == type &&
+		    (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
+		     mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
+		    mp_irqs[i].mp_dstirq == pin)
 			return i;
 
 	return -1;
@@ -470,13 +470,13 @@ static int __init find_isa_irq_pin(int irq, int type)
 	int i;
 
 	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mpc_srcbus;
+		int lbus = mp_irqs[i].mp_srcbus;
 
 		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].mpc_irqtype == type) &&
-		    (mp_irqs[i].mpc_srcbusirq == irq))
+		    (mp_irqs[i].mp_irqtype == type) &&
+		    (mp_irqs[i].mp_srcbusirq == irq))
 
-			return mp_irqs[i].mpc_dstirq;
+			return mp_irqs[i].mp_dstirq;
 	}
 	return -1;
 }
@@ -486,17 +486,17 @@ static int __init find_isa_irq_apic(int irq, int type)
 	int i;
 
 	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mpc_srcbus;
+		int lbus = mp_irqs[i].mp_srcbus;
 
 		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].mpc_irqtype == type) &&
-		    (mp_irqs[i].mpc_srcbusirq == irq))
+		    (mp_irqs[i].mp_irqtype == type) &&
+		    (mp_irqs[i].mp_srcbusirq == irq))
 			break;
 	}
 	if (i < mp_irq_entries) {
 		int apic;
 		for(apic = 0; apic < nr_ioapics; apic++) {
-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic)
+			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
 				return apic;
 		}
 	}
@@ -521,23 +521,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 		return -1;
 	}
 	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mpc_srcbus;
+		int lbus = mp_irqs[i].mp_srcbus;
 
 		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic ||
-			    mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
+			    mp_irqs[i].mp_dstapic == MP_APIC_ALL)
 				break;
 
 		if (!test_bit(lbus, mp_bus_not_pci) &&
-		    !mp_irqs[i].mpc_irqtype &&
+		    !mp_irqs[i].mp_irqtype &&
 		    (bus == lbus) &&
-		    (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+		    (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
+			int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
 
 			if (!(apic || IO_APIC_IRQ(irq)))
 				continue;
 
-			if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+			if (pin == (mp_irqs[i].mp_srcbusirq & 3))
 				return irq;
 			/*
 			 * Use the first all-but-pin matching entry as a
@@ -565,13 +565,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 
 static int MPBIOS_polarity(int idx)
 {
-	int bus = mp_irqs[idx].mpc_srcbus;
+	int bus = mp_irqs[idx].mp_srcbus;
 	int polarity;
 
 	/*
 	 * Determine IRQ line polarity (high active or low active):
 	 */
-	switch (mp_irqs[idx].mpc_irqflag & 3)
+	switch (mp_irqs[idx].mp_irqflag & 3)
 	{
 		case 0: /* conforms, ie. bus-type dependent polarity */
 			if (test_bit(bus, mp_bus_not_pci))
@@ -607,13 +607,13 @@ static int MPBIOS_polarity(int idx)
 
 static int MPBIOS_trigger(int idx)
 {
-	int bus = mp_irqs[idx].mpc_srcbus;
+	int bus = mp_irqs[idx].mp_srcbus;
 	int trigger;
 
 	/*
 	 * Determine IRQ trigger mode (edge or level sensitive):
 	 */
-	switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+	switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
 	{
 		case 0: /* conforms, ie. bus-type dependent */
 			if (test_bit(bus, mp_bus_not_pci))
@@ -660,16 +660,16 @@ static inline int irq_trigger(int idx)
 static int pin_2_irq(int idx, int apic, int pin)
 {
 	int irq, i;
-	int bus = mp_irqs[idx].mpc_srcbus;
+	int bus = mp_irqs[idx].mp_srcbus;
 
 	/*
 	 * Debugging check, we are in big trouble if this message pops up!
 	 */
-	if (mp_irqs[idx].mpc_dstirq != pin)
+	if (mp_irqs[idx].mp_dstirq != pin)
 		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
 
 	if (test_bit(bus, mp_bus_not_pci)) {
-		irq = mp_irqs[idx].mpc_srcbusirq;
+		irq = mp_irqs[idx].mp_srcbusirq;
 	} else {
 		/*
 		 * PCI IRQs are mapped in order
@@ -2242,8 +2242,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 		return -1;
 
 	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].mpc_irqtype == mp_INT &&
-		    mp_irqs[i].mpc_srcbusirq == bus_irq)
+		if (mp_irqs[i].mp_irqtype == mp_INT &&
+		    mp_irqs[i].mp_srcbusirq == bus_irq)
 			break;
 	if (i >= mp_irq_entries)
 		return -1;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 9f1e5bf7f0f..59f051db236 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -186,12 +186,18 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
 
 static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
 {
-	mp_irqs[mp_irq_entries] = *m;
 	printk(KERN_INFO "Int: type %d, pol %d, trig %d, bus %02x,"
 		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
 		m->mpc_irqtype, m->mpc_irqflag & 3,
 		(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
 		m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+	mp_irqs[mp_irq_entries].mp_dstapic = m->mpc_dstapic;
+	mp_irqs[mp_irq_entries].mp_type = m->mpc_type;
+	mp_irqs[mp_irq_entries].mp_irqtype = m->mpc_irqtype;
+	mp_irqs[mp_irq_entries].mp_irqflag = m->mpc_irqflag;
+	mp_irqs[mp_irq_entries].mp_srcbus = m->mpc_srcbus;
+	mp_irqs[mp_irq_entries].mp_srcbusirq = m->mpc_srcbusirq;
+	mp_irqs[mp_irq_entries].mp_dstirq = m->mpc_dstirq;
 	if (++mp_irq_entries == MAX_IRQ_SOURCES)
 		panic("Max # of irq sources exceeded!!\n");
 }
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index ade76c0d03a..86d8c3bdcca 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -120,6 +120,16 @@ struct mp_config_ioapic {
 	unsigned char mp_flags;
 };
 
+struct mp_config_intsrc {
+	unsigned int mp_dstapic;
+	unsigned char mp_type;
+	unsigned char mp_irqtype;
+	unsigned short mp_irqflag;
+	unsigned char mp_srcbus;
+	unsigned char mp_srcbusirq;
+	unsigned char mp_dstirq;
+};
+
 /* I/O APIC entries */
 extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 
@@ -127,7 +137,7 @@ extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 extern int mp_irq_entries;
 
 /* MP IRQ source entries */
-extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
 /* non-0 if default (table-less) MP configuration */
 extern int mpc_default_type;
-- 
cgit v1.2.3-70-g09d2


From bf62f3981c7076714e3b9f5fa6989a806cad02bf Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 20 May 2008 20:10:58 -0700
Subject: x86: move e820_mark_nosave_regions to e820.c

and make e820_mark_nosave_regions to take limit_pfn to use max_low_pfn
for 32bit and end_pfn for 64bit

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/e820.c     | 32 ++++++++++++++++++++++++++++++++
 arch/x86/kernel/e820_32.c  | 32 --------------------------------
 arch/x86/kernel/e820_64.c  | 32 --------------------------------
 arch/x86/kernel/setup_32.c |  2 +-
 arch/x86/kernel/setup_64.c |  2 +-
 include/asm-x86/e820.h     |  9 +++++++++
 include/asm-x86/e820_32.h  |  8 --------
 include/asm-x86/e820_64.h  |  1 -
 8 files changed, 43 insertions(+), 75 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 35da8cdbe5e..0cd9132c945 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/pfn.h>
+#include <linux/suspend.h>
 
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -495,6 +496,37 @@ __init void e820_setup_gap(void)
 	       pci_mem_start, gapstart, gapsize);
 }
 
+#if defined(CONFIG_X86_64) || \
+	(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
+/**
+ * Find the ranges of physical addresses that do not correspond to
+ * e820 RAM areas and mark the corresponding pages as nosave for
+ * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
+ *
+ * This function requires the e820 map to be sorted and without any
+ * overlapping entries and assumes the first e820 area to be RAM.
+ */
+void __init e820_mark_nosave_regions(unsigned long limit_pfn)
+{
+	int i;
+	unsigned long pfn;
+
+	pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
+	for (i = 1; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+
+		if (pfn < PFN_UP(ei->addr))
+			register_nosave_region(pfn, PFN_UP(ei->addr));
+
+		pfn = PFN_DOWN(ei->addr + ei->size);
+		if (ei->type != E820_RAM)
+			register_nosave_region(PFN_UP(ei->addr), pfn);
+
+		if (pfn >= limit_pfn)
+			break;
+	}
+}
+#endif
 
 /*
  * Early reserved memory areas.
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index dfe25751038..db760d4706a 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -9,7 +9,6 @@
 #include <linux/mm.h>
 #include <linux/pfn.h>
 #include <linux/uaccess.h>
-#include <linux/suspend.h>
 
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -208,37 +207,6 @@ void __init init_iomem_resources(struct resource *code_resource,
 	}
 }
 
-#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
-/**
- * e820_mark_nosave_regions - Find the ranges of physical addresses that do not
- * correspond to e820 RAM areas and mark the corresponding pages as nosave for
- * hibernation.
- *
- * This function requires the e820 map to be sorted and without any
- * overlapping entries and assumes the first e820 area to be RAM.
- */
-void __init e820_mark_nosave_regions(void)
-{
-	int i;
-	unsigned long pfn;
-
-	pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
-	for (i = 1; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-
-		if (pfn < PFN_UP(ei->addr))
-			register_nosave_region(pfn, PFN_UP(ei->addr));
-
-		pfn = PFN_DOWN(ei->addr + ei->size);
-		if (ei->type != E820_RAM)
-			register_nosave_region(PFN_UP(ei->addr), pfn);
-
-		if (pfn >= max_low_pfn)
-			break;
-	}
-}
-#endif
-
 /*
  * Find the highest page frame number we have available
  */
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 3b2e0b1bb49..5e063e72b24 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -17,7 +17,6 @@
 #include <linux/kexec.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/suspend.h>
 #include <linux/pfn.h>
 #include <linux/pci.h>
 
@@ -93,37 +92,6 @@ void __init e820_reserve_resources(void)
 	}
 }
 
-/*
- * Find the ranges of physical addresses that do not correspond to
- * e820 RAM areas and mark the corresponding pages as nosave for software
- * suspend and suspend to RAM.
- *
- * This function requires the e820 map to be sorted and without any
- * overlapping entries and assumes the first e820 area to be RAM.
- */
-void __init e820_mark_nosave_regions(void)
-{
-	int i;
-	unsigned long paddr;
-
-	paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
-	for (i = 1; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-
-		if (paddr < ei->addr)
-			register_nosave_region(PFN_DOWN(paddr),
-						PFN_UP(ei->addr));
-
-		paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
-		if (ei->type != E820_RAM)
-			register_nosave_region(PFN_UP(ei->addr),
-						PFN_DOWN(paddr));
-
-		if (paddr >= (end_pfn << PAGE_SHIFT))
-			break;
-	}
-}
-
 /*
  * Finds an active region in the address range from start_pfn to last_pfn and
  * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 3c451d143eb..e1173aecf69 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -820,7 +820,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	e820_setup_gap();
-	e820_mark_nosave_regions();
+	e820_mark_nosave_regions(max_low_pfn);
 
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 6dff1286ad8..975a5da46e4 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -516,7 +516,7 @@ void __init setup_arch(char **cmdline_p)
 	 * We trust e820 completely. No explicit ROM probing in memory.
 	 */
 	e820_reserve_resources();
-	e820_mark_nosave_regions();
+	e820_mark_nosave_regions(end_pfn);
 
 	/* request I/O space for devices used on all i[345]86 PCs */
 	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 5a58e2bb1d7..4266a2c5f2e 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -70,6 +70,15 @@ extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
 extern void update_e820(void);
 extern void e820_setup_gap(void);
 
+#if defined(CONFIG_X86_64) || \
+	(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
+extern void e820_mark_nosave_regions(unsigned long limit_pfn);
+#else
+static inline void e820_mark_nosave_regions(unsigned long limit_pfn)
+{
+}
+#endif
+
 extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
 extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
 extern void reserve_early(u64 start, u64 end, char *name);
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index 9576b438fbd..7ace82570a3 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -28,13 +28,5 @@ extern void init_iomem_resources(struct resource *code_resource,
 				 struct resource *data_resource,
 				 struct resource *bss_resource);
 
-#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
-extern void e820_mark_nosave_regions(void);
-#else
-static inline void e820_mark_nosave_regions(void)
-{
-}
-#endif
-
 #endif/*!__ASSEMBLY__*/
 #endif/*__E820_HEADER*/
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index 37f176a02bc..917963ccab6 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -18,7 +18,6 @@ extern void setup_memory_region(void);
 extern void contig_e820_setup(void);
 extern unsigned long e820_end_of_ram(void);
 extern void e820_reserve_resources(void);
-extern void e820_mark_nosave_regions(void);
 extern int e820_any_non_reserved(unsigned long start, unsigned long end);
 extern int is_memory_any_valid(unsigned long start, unsigned long end);
 extern int e820_all_non_reserved(unsigned long start, unsigned long end);
-- 
cgit v1.2.3-70-g09d2


From ce6444d39fdea29dcf145d2d95fe9cdc850aa53c Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <astarikovskiy@suse.de>
Date: Mon, 19 May 2008 19:47:09 +0400
Subject: x86: mp_bus_id_to_pci_bus is not needed

---
 arch/x86/kernel/io_apic_32.c |  2 +-
 arch/x86/kernel/io_apic_64.c |  2 +-
 arch/x86/kernel/mpparse.c    | 10 ----------
 include/asm-x86/mpspec.h     |  2 --
 4 files changed, 2 insertions(+), 14 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index ea68c3e5ba1..17b2e8f862d 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -864,7 +864,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 
 	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
 		"slot:%d, pin:%d.\n", bus, slot, pin);
-	if (mp_bus_id_to_pci_bus[bus] == -1) {
+	if (test_bit(bus, mp_bus_not_pci)) {
 		printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
 		return -1;
 	}
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index e7f1476ed53..9637675ea55 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -516,7 +516,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 
 	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
 		bus, slot, pin);
-	if (mp_bus_id_to_pci_bus[bus] == -1) {
+	if (test_bit(bus, mp_bus_not_pci)) {
 		apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
 		return -1;
 	}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index d67cd7600a2..83dfd6696c2 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -41,13 +41,6 @@ int mp_bus_id_to_type[MAX_MP_BUSSES];
 #endif
 
 DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 };
-
-static int mp_current_pci_id;
-
-/*
- * Intel MP BIOS table parsing routines:
- */
 
 /*
  * Checksum an MP configuration block.
@@ -101,7 +94,6 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
 static void __init MP_bus_info(struct mpc_config_bus *m)
 {
 	char str[7];
-
 	memcpy(str, m->mpc_bustype, 6);
 	str[6] = 0;
 
@@ -130,8 +122,6 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
 		mpc_oem_pci_bus(m, translation_table[mpc_record]);
 #endif
 		clear_bit(m->mpc_busid, mp_bus_not_pci);
-		mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
-		mp_current_pci_id++;
 #if defined(CONFIG_EISA) || defined (CONFIG_MCA)
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
 	} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index 57a991b9c05..b785ddd8d76 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -32,8 +32,6 @@ extern int mp_bus_id_to_type[MAX_MP_BUSSES];
 
 extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
-extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
-
 extern unsigned int boot_cpu_physical_apicid;
 extern int smp_found_config;
 extern int mpc_default_type;
-- 
cgit v1.2.3-70-g09d2


From 2e4db9d2c5db9c9dd3e1d1ec3263bc4a990ff8df Mon Sep 17 00:00:00 2001
From: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Date: Mon, 19 May 2008 14:23:44 -0700
Subject: x86: remove duplicate declaration of unknown_nmi_panic

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/nmi.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index 1e363021e72..8455bf36d8d 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h
@@ -46,7 +46,6 @@ extern void nmi_watchdog_default(void);
 
 extern int check_nmi_watchdog(void);
 extern int nmi_watchdog_enabled;
-extern int unknown_nmi_panic;
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int avail_to_resrv_perfctr_nmi(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
-- 
cgit v1.2.3-70-g09d2


From ddca03c98a7f7ad5ab09967ff52d4ed60358c896 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sat, 24 May 2008 19:36:31 +0400
Subject: x86: nmi - unify die_nmi() interface

By slightly changing 32bit mode die_nmi() we may unify the
interface and make it common for both (32/64bit) modes

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: hpa@zytor.com
Cc: mingo@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/nmi_32.c   | 7 +++----
 arch/x86/kernel/traps_32.c | 8 +++++---
 include/asm-x86/nmi.h      | 2 +-
 3 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 69bdae555c1..bd04a28f7a5 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -320,8 +320,6 @@ void touch_nmi_watchdog(void)
 }
 EXPORT_SYMBOL(touch_nmi_watchdog);
 
-extern void die_nmi(struct pt_regs *, const char *msg);
-
 notrace __kprobes int
 nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 {
@@ -375,7 +373,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 			/*
 			 * die_nmi will return ONLY if NOTIFY_STOP happens..
 			 */
-			die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
+			die_nmi("BUG: NMI Watchdog detected LOCKUP",
+				regs, 0);
 	} else {
 		__get_cpu_var(last_irq_sum) = sum;
 		local_set(&__get_cpu_var(alert_counter), 0);
@@ -406,7 +405,7 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
 	char buf[64];
 
 	sprintf(buf, "NMI received for unknown reason %02x\n", reason);
-	die_nmi(regs, buf);
+	die_nmi(buf, regs, 0);
 	return 0;
 }
 
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index bde6f63e15d..f31e6651fa6 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -755,9 +755,9 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 
 static DEFINE_SPINLOCK(nmi_print_lock);
 
-void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg)
+void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
 {
-	if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP)
+	if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
 		return;
 
 	spin_lock(&nmi_print_lock);
@@ -766,10 +766,12 @@ void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg)
 	* to get a message out:
 	*/
 	bust_spinlocks(1);
-	printk(KERN_EMERG "%s", msg);
+	printk(KERN_EMERG "%s", str);
 	printk(" on CPU%d, ip %08lx, registers:\n",
 		smp_processor_id(), regs->ip);
 	show_registers(regs);
+	if (do_panic)
+		panic("Non maskable interrupt");
 	console_silent();
 	spin_unlock(&nmi_print_lock);
 	bust_spinlocks(0);
diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index 8455bf36d8d..7cd5b6a8813 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h
@@ -38,12 +38,12 @@ static inline void unset_nmi_pm_callback(struct pm_dev *dev)
 
 #ifdef CONFIG_X86_64
 extern void default_do_nmi(struct pt_regs *);
-extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
 extern void nmi_watchdog_default(void);
 #else
 #define nmi_watchdog_default() do {} while (0)
 #endif
 
+extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
 extern int check_nmi_watchdog(void);
 extern int nmi_watchdog_enabled;
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
-- 
cgit v1.2.3-70-g09d2


From 4b82b277707a39b97271439c475f186f63ec4692 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sat, 24 May 2008 19:36:35 +0400
Subject: x86: nmi_32.c - add nmi_watchdog_default helper

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: hpa@zytor.com
Cc: mingo@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/nmi_32.c | 19 +++++++++++++------
 include/asm-x86/nmi.h    |  4 +---
 2 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 4437fe1edab..7714e847821 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -51,6 +51,17 @@ static DEFINE_PER_CPU(short, wd_enabled);
 
 static int endflag __initdata = 0;
 
+/* Run after command line and cpu_init init, but before all other checks */
+void nmi_watchdog_default(void)
+{
+	if (nmi_watchdog != NMI_DEFAULT)
+		return;
+	if (lapic_watchdog_ok())
+		nmi_watchdog = NMI_LOCAL_APIC;
+	else
+		nmi_watchdog = NMI_IO_APIC;
+}
+
 #ifdef CONFIG_SMP
 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
  * the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -437,12 +448,8 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
 		return -EIO;
 	}
 
-	if (nmi_watchdog == NMI_DEFAULT) {
-		if (lapic_watchdog_ok())
-			nmi_watchdog = NMI_LOCAL_APIC;
-		else
-			nmi_watchdog = NMI_IO_APIC;
-	}
+	/* if nmi_watchdog is not set yet, then set it */
+	nmi_watchdog_default();
 
 	if (nmi_watchdog == NMI_LOCAL_APIC) {
 		if (nmi_watchdog_enabled)
diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index 7cd5b6a8813..1e8f34d7ab6 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h
@@ -38,11 +38,9 @@ static inline void unset_nmi_pm_callback(struct pm_dev *dev)
 
 #ifdef CONFIG_X86_64
 extern void default_do_nmi(struct pt_regs *);
-extern void nmi_watchdog_default(void);
-#else
-#define nmi_watchdog_default() do {} while (0)
 #endif
 
+extern void nmi_watchdog_default(void);
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
 extern int check_nmi_watchdog(void);
 extern int nmi_watchdog_enabled;
-- 
cgit v1.2.3-70-g09d2


From 4b6011bc6ec71660859139ac8d28b7d0badd681c Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Sun, 25 May 2008 21:16:36 +0200
Subject: x86: Remove obsolete LOCK macro from include/asm-x86/atomic_64.h

Commit d167a518 "[PATCH] x86_64: x86_64 version of the smp alternative patch."
has left the LOCK macro in include/asm-x86_64/atomic.h, which is now
include/asm-x86/atomic_64.h. Its scope should be local to the file, other
architectures don't provide it, I couldn't find an in-tree user of it and
allyesconfig, allmodconfig and allnoconfig build fine without it, so this patch
removes it.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/atomic_64.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/atomic_64.h b/include/asm-x86/atomic_64.h
index 3e0cd7d3833..fe589c153db 100644
--- a/include/asm-x86/atomic_64.h
+++ b/include/asm-x86/atomic_64.h
@@ -11,12 +11,6 @@
  * resource counting etc..
  */
 
-#ifdef CONFIG_SMP
-#define LOCK "lock ; "
-#else
-#define LOCK ""
-#endif
-
 /*
  * Make sure gcc doesn't try to be clever and move things around
  * on us. We need to use _exactly_ the address the user gave us,
-- 
cgit v1.2.3-70-g09d2


From 1a20d3ecf5c2a6435df2b756435b1eb1c657d45b Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 26 May 2008 13:36:53 -0700
Subject: x86: string_32.h: workaround for broken gcc 4.0

gcc 4.0 fails to allocate %eax for the pattern operand in the rep
store instructions used by memset; force it to do so by declaring a
register variable.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/asm-x86/string_32.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h
index 8d0c593c441..193578cd1fd 100644
--- a/include/asm-x86/string_32.h
+++ b/include/asm-x86/string_32.h
@@ -267,11 +267,18 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
 	asm volatile("rep ; stosl"					\
 		     x							\
 		     : "=&c" (d0), "=&D" (d1)				\
-		     : "a" (pattern), "0" (count/4), "1" ((long)s)	\
+		     : "a" (eax), "0" (count/4), "1" ((long)s)	\
 		     : "memory")
 
 	{
 		int d0, d1;
+#if __GNUC__ == 4 && __GNUC_MINOR__ == 0
+		/* Workaround for broken gcc 4.0 */
+		register unsigned long eax asm("%eax") = pattern;
+#else
+		unsigned long eax = pattern;
+#endif
+
 		switch (count % 4) {
 		case 0:
 			COMMON("");
-- 
cgit v1.2.3-70-g09d2


From 4226ab93d8ae3fd895abe45879fe34d489a98718 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 26 May 2008 23:30:58 +0100
Subject: x86: use pteval_t for _PAGE_FOO

Rather than making _PAGE_* constants signed, and then relying on
sign-extension to make sure that masks derived from them are wide
enough, just explicitly type them pteval_t.  This guarantees that they
and any derived values are the right size for the current pte format.

The reliance on sign extension is fragile, and invokes some very
subtle corners of the C type system.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/pgtable.h | 49 +++++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 27 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 97c271b2910..fab3ecb54b6 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -20,30 +20,25 @@
 #define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
 #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
 
-/*
- * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a
- * sign-extended value on 32-bit with all 1's in the upper word,
- * which preserves the upper pte values on 64-bit ptes:
- */
-#define _PAGE_PRESENT	(_AC(1, L)<<_PAGE_BIT_PRESENT)
-#define _PAGE_RW	(_AC(1, L)<<_PAGE_BIT_RW)
-#define _PAGE_USER	(_AC(1, L)<<_PAGE_BIT_USER)
-#define _PAGE_PWT	(_AC(1, L)<<_PAGE_BIT_PWT)
-#define _PAGE_PCD	(_AC(1, L)<<_PAGE_BIT_PCD)
-#define _PAGE_ACCESSED	(_AC(1, L)<<_PAGE_BIT_ACCESSED)
-#define _PAGE_DIRTY	(_AC(1, L)<<_PAGE_BIT_DIRTY)
-#define _PAGE_PSE	(_AC(1, L)<<_PAGE_BIT_PSE)	/* 2MB page */
-#define _PAGE_GLOBAL	(_AC(1, L)<<_PAGE_BIT_GLOBAL)	/* Global TLB entry */
-#define _PAGE_UNUSED1	(_AC(1, L)<<_PAGE_BIT_UNUSED1)
-#define _PAGE_UNUSED2	(_AC(1, L)<<_PAGE_BIT_UNUSED2)
-#define _PAGE_UNUSED3	(_AC(1, L)<<_PAGE_BIT_UNUSED3)
-#define _PAGE_PAT	(_AC(1, L)<<_PAGE_BIT_PAT)
-#define _PAGE_PAT_LARGE (_AC(1, L)<<_PAGE_BIT_PAT_LARGE)
+#define _PAGE_PRESENT	(_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
+#define _PAGE_RW	(_AT(pteval_t, 1) << _PAGE_BIT_RW)
+#define _PAGE_USER	(_AT(pteval_t, 1) << _PAGE_BIT_USER)
+#define _PAGE_PWT	(_AT(pteval_t, 1) << _PAGE_BIT_PWT)
+#define _PAGE_PCD	(_AT(pteval_t, 1) << _PAGE_BIT_PCD)
+#define _PAGE_ACCESSED	(_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
+#define _PAGE_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
+#define _PAGE_PSE	(_AT(pteval_t, 1) << _PAGE_BIT_PSE)
+#define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
+#define _PAGE_UNUSED1	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
+#define _PAGE_UNUSED2	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED2)
+#define _PAGE_UNUSED3	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
+#define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)
+#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
 
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-#define _PAGE_NX	(_AC(1, ULL) << _PAGE_BIT_NX)
+#define _PAGE_NX	(_AT(pteval_t, 1) << _PAGE_BIT_NX)
 #else
-#define _PAGE_NX	0
+#define _PAGE_NX	(_AT(pteval_t, 0))
 #endif
 
 /* If _PAGE_PRESENT is clear, we use these: */
@@ -210,22 +205,22 @@ static inline int pmd_large(pmd_t pte)
 
 static inline pte_t pte_mkclean(pte_t pte)
 {
-	return __pte(pte_val(pte) & ~(pteval_t)_PAGE_DIRTY);
+	return __pte(pte_val(pte) & ~_PAGE_DIRTY);
 }
 
 static inline pte_t pte_mkold(pte_t pte)
 {
-	return __pte(pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED);
+	return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
 }
 
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-	return __pte(pte_val(pte) & ~(pteval_t)_PAGE_RW);
+	return __pte(pte_val(pte) & ~_PAGE_RW);
 }
 
 static inline pte_t pte_mkexec(pte_t pte)
 {
-	return __pte(pte_val(pte) & ~(pteval_t)_PAGE_NX);
+	return __pte(pte_val(pte) & ~_PAGE_NX);
 }
 
 static inline pte_t pte_mkdirty(pte_t pte)
@@ -250,7 +245,7 @@ static inline pte_t pte_mkhuge(pte_t pte)
 
 static inline pte_t pte_clrhuge(pte_t pte)
 {
-	return __pte(pte_val(pte) & ~(pteval_t)_PAGE_PSE);
+	return __pte(pte_val(pte) & ~_PAGE_PSE);
 }
 
 static inline pte_t pte_mkglobal(pte_t pte)
@@ -260,7 +255,7 @@ static inline pte_t pte_mkglobal(pte_t pte)
 
 static inline pte_t pte_clrglobal(pte_t pte)
 {
-	return __pte(pte_val(pte) & ~(pteval_t)_PAGE_GLOBAL);
+	return __pte(pte_val(pte) & ~_PAGE_GLOBAL);
 }
 
 static inline pte_t pte_mkspecial(pte_t pte)
-- 
cgit v1.2.3-70-g09d2


From 4e09e21ccb0dfe7ee8d5641192e0072e83bd916b Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 26 May 2008 23:31:03 +0100
Subject: x86: use symbolic constant in stts()

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/system.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index a2f04cd79b2..7e4c133795a 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -289,7 +289,7 @@ static inline void native_wbinvd(void)
 
 #endif/* CONFIG_PARAVIRT */
 
-#define stts() write_cr0(8 | read_cr0())
+#define stts() write_cr0(read_cr0() | X86_CR0_TS)
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3-70-g09d2


From 7b1333aa4cb546ddeb9c05098a53d9a777623a05 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 26 May 2008 23:31:01 +0100
Subject: xen: use hypercall rather than clts

Xen will trap and emulate clts, but its better to use a hypercall.
Also, xenner doesn't handle clts.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/enlighten.c        | 28 ++++++++++++++++++++++++++--
 include/asm-x86/xen/hypercall.h |  7 +++++++
 2 files changed, 33 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a05b7721bc8..446f4cd649e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -607,6 +607,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 }
 
+static void xen_clts(void)
+{
+	struct multicall_space mcs;
+
+	mcs = xen_mc_entry(0);
+
+	MULTI_fpu_taskswitch(mcs.mc, 0);
+
+	xen_mc_issue(PARAVIRT_LAZY_CPU);
+}
+
+static void xen_write_cr0(unsigned long cr0)
+{
+	struct multicall_space mcs;
+
+	/* Only pay attention to cr0.TS; everything else is
+	   ignored. */
+	mcs = xen_mc_entry(0);
+
+	MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
+
+	xen_mc_issue(PARAVIRT_LAZY_CPU);
+}
+
 static void xen_write_cr2(unsigned long cr2)
 {
 	x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
@@ -978,10 +1002,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.set_debugreg = xen_set_debugreg,
 	.get_debugreg = xen_get_debugreg,
 
-	.clts = native_clts,
+	.clts = xen_clts,
 
 	.read_cr0 = native_read_cr0,
-	.write_cr0 = native_write_cr0,
+	.write_cr0 = xen_write_cr0,
 
 	.read_cr4 = native_read_cr4,
 	.read_cr4_safe = native_read_cr4_safe,
diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
index c2ccd997ed3..897ff79cca3 100644
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -314,6 +314,13 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg)
 	return _hypercall2(int, nmi_op, op, arg);
 }
 
+static inline void
+MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
+{
+	mcl->op = __HYPERVISOR_fpu_taskswitch;
+	mcl->args[0] = set;
+}
+
 static inline void
 MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
 			pte_t new_val, unsigned long flags)
-- 
cgit v1.2.3-70-g09d2


From 349c709f42453707f74bece0d9d35ee5b3842893 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 26 May 2008 23:31:02 +0100
Subject: xen: use new sched_op

Use the new sched_op hypercall, mainly because xenner doesn't support
the old one.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/enlighten.c        | 6 ++++--
 include/asm-x86/xen/hypercall.h | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 446f4cd649e..35ddaf50180 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -254,7 +254,7 @@ static void xen_irq_enable(void)
 static void xen_safe_halt(void)
 {
 	/* Blocking includes an implicit local_irq_enable(). */
-	if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0)
+	if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
 		BUG();
 }
 
@@ -1138,11 +1138,13 @@ static const struct smp_ops xen_smp_ops __initdata = {
 
 static void xen_reboot(int reason)
 {
+	struct sched_shutdown r = { .reason = reason };
+
 #ifdef CONFIG_SMP
 	smp_send_stop();
 #endif
 
-	if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason))
+	if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
 		BUG();
 }
 
diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
index 897ff79cca3..2a4f9b41d68 100644
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -176,9 +176,9 @@ HYPERVISOR_fpu_taskswitch(int set)
 }
 
 static inline int
-HYPERVISOR_sched_op(int cmd, unsigned long arg)
+HYPERVISOR_sched_op(int cmd, void *arg)
 {
-	return _hypercall2(int, sched_op, cmd, arg);
+	return _hypercall2(int, sched_op_new, cmd, arg);
 }
 
 static inline long
-- 
cgit v1.2.3-70-g09d2


From a15af1c9ea2750a9ff01e51615c45950bad8221b Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 26 May 2008 23:31:06 +0100
Subject: x86/paravirt: add pte_flags to just get pte flags

Add pte_flags() to extract the flags from a pte.  This is a special
case of pte_val() which is only guaranteed to return the pte's flags
correctly; the page number may be corrupted or missing.

The intent is to allow paravirt implementations to return pte flags
without having to do any translation of the page number (most notably,
Xen).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt.c |  1 +
 arch/x86/xen/enlighten.c   |  1 +
 drivers/lguest/lg.h        |  1 -
 include/asm-x86/page.h     |  1 +
 include/asm-x86/paravirt.h | 15 +++++++++++++++
 include/asm-x86/pgtable.h  | 16 ++++++++--------
 6 files changed, 26 insertions(+), 9 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 74f0c5ea2a0..c98d5468818 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -403,6 +403,7 @@ struct pv_mmu_ops pv_mmu_ops = {
 #endif /* PAGETABLE_LEVELS >= 3 */
 
 	.pte_val = native_pte_val,
+	.pte_flags = native_pte_val,
 	.pgd_val = native_pgd_val,
 
 	.make_pte = native_make_pte,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 4a372b71239..1b4b5fa498b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1101,6 +1101,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.set_pmd = xen_set_pmd,
 
 	.pte_val = xen_pte_val,
+	.pte_flags = native_pte_val,
 	.pgd_val = xen_pgd_val,
 
 	.make_pte = xen_make_pte,
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 005bd045d2e..5faefeaf679 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -136,7 +136,6 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
  * first step in the migration to the kernel types.  pte_pfn is already defined
  * in the kernel. */
 #define pgd_flags(x)	(pgd_val(x) & ~PAGE_MASK)
-#define pte_flags(x)	(pte_val(x) & ~PAGE_MASK)
 #define pgd_pfn(x)	(pgd_val(x) >> PAGE_SHIFT)
 
 /* interrupts_and_traps.c: */
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index dc936dddf16..a1e2b9470f2 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -160,6 +160,7 @@ static inline pteval_t native_pte_val(pte_t pte)
 #endif
 
 #define pte_val(x)	native_pte_val(x)
+#define pte_flags(x)	native_pte_val(x)
 #define __pte(x)	native_make_pte(x)
 
 #endif	/* CONFIG_PARAVIRT */
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 0f13b945e24..5ea37a48eec 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -239,6 +239,7 @@ struct pv_mmu_ops {
 				 unsigned long addr, pte_t *ptep);
 
 	pteval_t (*pte_val)(pte_t);
+	pteval_t (*pte_flags)(pte_t);
 	pte_t (*make_pte)(pteval_t pte);
 
 	pgdval_t (*pgd_val)(pgd_t);
@@ -996,6 +997,20 @@ static inline pteval_t pte_val(pte_t pte)
 	return ret;
 }
 
+static inline pteval_t pte_flags(pte_t pte)
+{
+	pteval_t ret;
+
+	if (sizeof(pteval_t) > sizeof(long))
+		ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags,
+				 pte.pte, (u64)pte.pte >> 32);
+	else
+		ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
+				 pte.pte);
+
+	return ret;
+}
+
 static inline pgd_t __pgd(pgdval_t val)
 {
 	pgdval_t ret;
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 97c271b2910..47a852cb8c9 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -164,37 +164,37 @@ extern struct list_head pgd_list;
  */
 static inline int pte_dirty(pte_t pte)
 {
-	return pte_val(pte) & _PAGE_DIRTY;
+	return pte_flags(pte) & _PAGE_DIRTY;
 }
 
 static inline int pte_young(pte_t pte)
 {
-	return pte_val(pte) & _PAGE_ACCESSED;
+	return pte_flags(pte) & _PAGE_ACCESSED;
 }
 
 static inline int pte_write(pte_t pte)
 {
-	return pte_val(pte) & _PAGE_RW;
+	return pte_flags(pte) & _PAGE_RW;
 }
 
 static inline int pte_file(pte_t pte)
 {
-	return pte_val(pte) & _PAGE_FILE;
+	return pte_flags(pte) & _PAGE_FILE;
 }
 
 static inline int pte_huge(pte_t pte)
 {
-	return pte_val(pte) & _PAGE_PSE;
+	return pte_flags(pte) & _PAGE_PSE;
 }
 
 static inline int pte_global(pte_t pte)
 {
-	return pte_val(pte) & _PAGE_GLOBAL;
+	return pte_flags(pte) & _PAGE_GLOBAL;
 }
 
 static inline int pte_exec(pte_t pte)
 {
-	return !(pte_val(pte) & _PAGE_NX);
+	return !(pte_flags(pte) & _PAGE_NX);
 }
 
 static inline int pte_special(pte_t pte)
@@ -305,7 +305,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 	return __pgprot(preservebits | addbits);
 }
 
-#define pte_pgprot(x) __pgprot(pte_val(x) & ~PTE_MASK)
+#define pte_pgprot(x) __pgprot(pte_flags(x) & ~PTE_MASK)
 
 #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
 
-- 
cgit v1.2.3-70-g09d2


From d451bb7aa852627bdf7be7937dc3d9d9f261b235 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 26 May 2008 23:31:18 +0100
Subject: xen: make phys_to_machine structure dynamic

We now support the use of memory hotplug, so the physical to machine
page mapping structure must be dynamic.  This is implemented as a
two-level radix tree structure, which allows us to efficiently
incrementally allocate memory for the p2m table as new pages are
added.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/enlighten.c   |  2 +-
 arch/x86/xen/mmu.c         | 85 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/setup.c       |  2 --
 arch/x86/xen/xen-ops.h     |  2 ++
 include/asm-x86/xen/page.h | 20 ++++-------
 5 files changed, 94 insertions(+), 17 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5c0635a8bff..73d3c84a349 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1221,7 +1221,7 @@ asmlinkage void __init xen_start_kernel(void)
 
 	/* Get mfn list */
 	if (!xen_feature(XENFEAT_auto_translated_physmap))
-		phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
+		xen_build_dynamic_phys_to_machine();
 
 	pgd = (pgd_t *)xen_start_info->pt_base;
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 07c2653ec33..c3b27dec6f0 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -56,6 +56,91 @@
 #include "multicalls.h"
 #include "mmu.h"
 
+/*
+ * This should probably be a config option.  On 32-bit, it costs 1
+ * page/gig of memory; on 64-bit its 2 pages/gig.  If we want it to be
+ * completely unbounded we can add another level to the p2m structure.
+ */
+#define MAX_GUEST_PAGES		(16ull * 1024*1024*1024 / PAGE_SIZE)
+#define P2M_ENTRIES_PER_PAGE	(PAGE_SIZE / sizeof(unsigned long))
+
+static unsigned long *p2m_top[MAX_GUEST_PAGES / P2M_ENTRIES_PER_PAGE];
+
+static inline unsigned p2m_top_index(unsigned long pfn)
+{
+	BUG_ON(pfn >= MAX_GUEST_PAGES);
+	return pfn / P2M_ENTRIES_PER_PAGE;
+}
+
+static inline unsigned p2m_index(unsigned long pfn)
+{
+	return pfn % P2M_ENTRIES_PER_PAGE;
+}
+
+void __init xen_build_dynamic_phys_to_machine(void)
+{
+	unsigned pfn;
+	unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
+
+	BUG_ON(xen_start_info->nr_pages >= MAX_GUEST_PAGES);
+
+	for(pfn = 0;
+	    pfn < xen_start_info->nr_pages;
+	    pfn += P2M_ENTRIES_PER_PAGE) {
+		unsigned topidx = p2m_top_index(pfn);
+
+		p2m_top[topidx] = &mfn_list[pfn];
+	}
+}
+
+unsigned long get_phys_to_machine(unsigned long pfn)
+{
+	unsigned topidx, idx;
+
+	topidx = p2m_top_index(pfn);
+	if (p2m_top[topidx] == NULL)
+		return INVALID_P2M_ENTRY;
+
+	idx = p2m_index(pfn);
+	return p2m_top[topidx][idx];
+}
+
+static void alloc_p2m(unsigned long **pp)
+{
+	unsigned long *p;
+	unsigned i;
+
+	p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
+	BUG_ON(p == NULL);
+
+	for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
+		p[i] = INVALID_P2M_ENTRY;
+
+	if (cmpxchg(pp, NULL, p) != NULL)
+		free_page((unsigned long)p);
+}
+
+void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+	unsigned topidx, idx;
+
+	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+		return;
+	}
+
+	topidx = p2m_top_index(pfn);
+	if (p2m_top[topidx] == NULL) {
+		/* no need to allocate a page to store an invalid entry */
+		if (mfn == INVALID_P2M_ENTRY)
+			return;
+		alloc_p2m(&p2m_top[topidx]);
+	}
+
+	idx = p2m_index(pfn);
+	p2m_top[topidx][idx] = mfn;
+}
+
 xmaddr_t arbitrary_virt_to_machine(unsigned long address)
 {
 	unsigned int level;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 82517e4a752..37f8f0b8f74 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -27,8 +27,6 @@
 extern const char xen_hypervisor_callback[];
 extern const char xen_failsafe_callback[];
 
-unsigned long *phys_to_machine_mapping;
-EXPORT_SYMBOL(phys_to_machine_mapping);
 
 /**
  * machine_specific_memory_setup - Hook for machine specific memory setup.
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f1063ae0803..7bdc8c5c924 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -22,6 +22,8 @@ void __init xen_arch_setup(void);
 void __init xen_init_IRQ(void);
 void xen_enable_sysenter(void);
 
+void __init xen_build_dynamic_phys_to_machine(void);
+
 void xen_setup_timer(int cpu);
 void xen_setup_cpu_clockevents(void);
 unsigned long xen_cpu_khz(void);
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index e11f24038b1..293344f8102 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -26,15 +26,15 @@ typedef struct xpaddr {
 #define FOREIGN_FRAME_BIT	(1UL<<31)
 #define FOREIGN_FRAME(m)	((m) | FOREIGN_FRAME_BIT)
 
-extern unsigned long *phys_to_machine_mapping;
+extern unsigned long get_phys_to_machine(unsigned long pfn);
+extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 
 static inline unsigned long pfn_to_mfn(unsigned long pfn)
 {
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return pfn;
 
-	return phys_to_machine_mapping[(unsigned int)(pfn)] &
-		~FOREIGN_FRAME_BIT;
+	return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT;
 }
 
 static inline int phys_to_machine_mapping_valid(unsigned long pfn)
@@ -42,7 +42,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return 1;
 
-	return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
+	return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
 }
 
 static inline unsigned long mfn_to_pfn(unsigned long mfn)
@@ -106,20 +106,12 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
 	unsigned long pfn = mfn_to_pfn(mfn);
 	if ((pfn < max_mapnr)
 	    && !xen_feature(XENFEAT_auto_translated_physmap)
-	    && (phys_to_machine_mapping[pfn] != mfn))
+	    && (get_phys_to_machine(pfn) != mfn))
 		return max_mapnr; /* force !pfn_valid() */
+	/* XXX fixme; not true with sparsemem */
 	return pfn;
 }
 
-static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-	if (xen_feature(XENFEAT_auto_translated_physmap)) {
-		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
-		return;
-	}
-	phys_to_machine_mapping[pfn] = mfn;
-}
-
 /* VIRT <-> MACHINE conversion */
 #define virt_to_machine(v)	(phys_to_machine(XPADDR(__pa(v))))
 #define virt_to_mfn(v)		(pfn_to_mfn(PFN_DOWN(__pa(v))))
-- 
cgit v1.2.3-70-g09d2


From 8006ec3e911f93d702e1d4a4e387e244ab434924 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 26 May 2008 23:31:19 +0100
Subject: xen: add configurable max domain size

Add a config option to set the max size of a Xen domain.  This is used
to scale the size of the physical-to-machine array; it ends up using
around 1 page/GByte, so there's no reason to be very restrictive.

For a 32-bit guest, the default value of 8GB is probably sufficient;
there's not much point in giving a 32-bit machine much more memory
than that.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/Kconfig       | 10 ++++++++++
 arch/x86/xen/mmu.c         | 25 ++++++++++++-------------
 arch/x86/xen/setup.c       |  3 +++
 include/asm-x86/xen/page.h |  5 +++++
 4 files changed, 30 insertions(+), 13 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 525b108411b..d0f1c7c5dc3 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -11,3 +11,13 @@ config XEN
 	  This is the Linux Xen port.  Enabling this will allow the
 	  kernel to boot in a paravirtualized environment under the
 	  Xen hypervisor.
+
+config XEN_MAX_DOMAIN_MEMORY
+       int "Maximum allowed size of a domain in gigabytes"
+       default 8
+       depends on XEN
+       help
+         The pseudo-physical to machine address array is sized
+         according to the maximum possible memory size of a Xen
+         domain.  This array uses 1 page per gigabyte, so there's no
+         need to be too stingy here.
\ No newline at end of file
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index c3b27dec6f0..644232aa7bf 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -56,19 +56,13 @@
 #include "multicalls.h"
 #include "mmu.h"
 
-/*
- * This should probably be a config option.  On 32-bit, it costs 1
- * page/gig of memory; on 64-bit its 2 pages/gig.  If we want it to be
- * completely unbounded we can add another level to the p2m structure.
- */
-#define MAX_GUEST_PAGES		(16ull * 1024*1024*1024 / PAGE_SIZE)
 #define P2M_ENTRIES_PER_PAGE	(PAGE_SIZE / sizeof(unsigned long))
 
-static unsigned long *p2m_top[MAX_GUEST_PAGES / P2M_ENTRIES_PER_PAGE];
+static unsigned long *p2m_top[MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE];
 
 static inline unsigned p2m_top_index(unsigned long pfn)
 {
-	BUG_ON(pfn >= MAX_GUEST_PAGES);
+	BUG_ON(pfn >= MAX_DOMAIN_PAGES);
 	return pfn / P2M_ENTRIES_PER_PAGE;
 }
 
@@ -81,12 +75,9 @@ void __init xen_build_dynamic_phys_to_machine(void)
 {
 	unsigned pfn;
 	unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
+	unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
 
-	BUG_ON(xen_start_info->nr_pages >= MAX_GUEST_PAGES);
-
-	for(pfn = 0;
-	    pfn < xen_start_info->nr_pages;
-	    pfn += P2M_ENTRIES_PER_PAGE) {
+	for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
 		unsigned topidx = p2m_top_index(pfn);
 
 		p2m_top[topidx] = &mfn_list[pfn];
@@ -97,6 +88,9 @@ unsigned long get_phys_to_machine(unsigned long pfn)
 {
 	unsigned topidx, idx;
 
+	if (unlikely(pfn >= MAX_DOMAIN_PAGES))
+		return INVALID_P2M_ENTRY;
+
 	topidx = p2m_top_index(pfn);
 	if (p2m_top[topidx] == NULL)
 		return INVALID_P2M_ENTRY;
@@ -129,6 +123,11 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 		return;
 	}
 
+	if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
+		BUG_ON(mfn != INVALID_P2M_ENTRY);
+		return;
+	}
+
 	topidx = p2m_top_index(pfn);
 	if (p2m_top[topidx] == NULL) {
 		/* no need to allocate a page to store an invalid entry */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 37f8f0b8f74..488447878a9 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -16,6 +16,7 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
+#include <xen/page.h>
 #include <xen/interface/callback.h>
 #include <xen/interface/physdev.h>
 #include <xen/features.h>
@@ -36,6 +37,8 @@ char * __init xen_memory_setup(void)
 {
 	unsigned long max_pfn = xen_start_info->nr_pages;
 
+	max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
+
 	e820.nr_map = 0;
 	add_memory_region(0, LOWMEMSIZE(), E820_RAM);
 	add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM);
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index 293344f8102..377c04591c1 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -26,6 +26,11 @@ typedef struct xpaddr {
 #define FOREIGN_FRAME_BIT	(1UL<<31)
 #define FOREIGN_FRAME(m)	((m) | FOREIGN_FRAME_BIT)
 
+/* Maximum amount of memory we can handle in a domain in pages */
+#define MAX_DOMAIN_PAGES						\
+    ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
+
+
 extern unsigned long get_phys_to_machine(unsigned long pfn);
 extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 
-- 
cgit v1.2.3-70-g09d2


From 19ec673ced067316b9732bc6d1c4ff4052e5f795 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 28 May 2008 23:00:47 +0400
Subject: x86: nmi - fix incorrect NMI watchdog used by default

The commit

	commit 4b82b277707a39b97271439c475f186f63ec4692
	Author: Cyrill Gorcunov <gorcunov@gmail.com>
	Date:   Sat May 24 19:36:35 2008 +0400

set nmi_watchdog to NMI_IO_APIC as by default. This causes hangs on some
machines with buggy watchdogs. Fix it - i.e. restore old behaviour.

Thanks to Sitsofe Wheeler and Adrian Bunk for catching the problem
and Maciej W. Rozycki for explanation what is going on there.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
CC: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/nmi.c | 16 +++++++++-------
 include/asm-x86/nmi.h |  4 +++-
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 69a839fc1eb..3671a9f3564 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -84,20 +84,15 @@ static inline unsigned int get_timer_irqs(int cpu)
 #endif
 }
 
+#ifdef CONFIG_X86_64
 /* Run after command line and cpu_init init, but before all other checks */
 void nmi_watchdog_default(void)
 {
 	if (nmi_watchdog != NMI_DEFAULT)
 		return;
-#ifdef CONFIG_X86_64
 	nmi_watchdog = NMI_NONE;
-#else
-	if (lapic_watchdog_ok())
-		nmi_watchdog = NMI_LOCAL_APIC;
-	else
-		nmi_watchdog = NMI_IO_APIC;
-#endif
 }
+#endif
 
 #ifdef CONFIG_SMP
 /*
@@ -488,8 +483,15 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
 		return -EIO;
 	}
 
+#ifdef CONFIG_X86_64
 	/* if nmi_watchdog is not set yet, then set it */
 	nmi_watchdog_default();
+#else
+	if (lapic_watchdog_ok())
+		nmi_watchdog = NMI_LOCAL_APIC;
+	else
+		nmi_watchdog = NMI_IO_APIC;
+#endif
 
 	if (nmi_watchdog == NMI_LOCAL_APIC) {
 		if (nmi_watchdog_enabled)
diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index 1e8f34d7ab6..6f4d44fc051 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h
@@ -38,9 +38,11 @@ static inline void unset_nmi_pm_callback(struct pm_dev *dev)
 
 #ifdef CONFIG_X86_64
 extern void default_do_nmi(struct pt_regs *);
+extern void nmi_watchdog_default(void);
+#else
+#define nmi_watchdog_default(void) do {} while (0)
 #endif
 
-extern void nmi_watchdog_default(void);
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
 extern int check_nmi_watchdog(void);
 extern int nmi_watchdog_enabled;
-- 
cgit v1.2.3-70-g09d2


From ba3a5974239293d921235e6fa82b09b670e674ef Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 30 May 2008 14:55:47 +0200
Subject: - fix typo in include/asm-x86/nmi.h

---
 include/asm-x86/nmi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index 6f4d44fc051..972a4f6f799 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h
@@ -40,7 +40,7 @@ static inline void unset_nmi_pm_callback(struct pm_dev *dev)
 extern void default_do_nmi(struct pt_regs *);
 extern void nmi_watchdog_default(void);
 #else
-#define nmi_watchdog_default(void) do {} while (0)
+#define nmi_watchdog_default() do { } while (0)
 #endif
 
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
-- 
cgit v1.2.3-70-g09d2


From d364319b989967b74e57fef5c8017fd56a16c392 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 2 May 2008 23:42:01 +0200
Subject: x86: move mmconfig declarations to header

arch/x86/kernel/mmconf-fam10h_64.c is missing the prototypes, which
are decalred in arch/x86/kernel/setup_64.c. Move the prototypes and
the inline stubs to the appropriate header file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/mmconf-fam10h_64.c |  1 +
 arch/x86/kernel/setup_64.c         | 13 +------------
 include/asm-x86/mmconfig.h         | 12 ++++++++++++
 3 files changed, 14 insertions(+), 12 deletions(-)
 create mode 100644 include/asm-x86/mmconfig.h

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index edc5fbfe85c..fdfdc550b36 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -12,6 +12,7 @@
 #include <asm/io.h>
 #include <asm/msr.h>
 #include <asm/acpi.h>
+#include <asm/mmconfig.h>
 
 #include "../pci/pci.h"
 
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 6dff1286ad8..341230db74e 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -71,6 +71,7 @@
 #include <asm/topology.h>
 #include <asm/trampoline.h>
 #include <asm/pat.h>
+#include <asm/mmconfig.h>
 
 #include <mach_apic.h>
 #ifdef CONFIG_PARAVIRT
@@ -293,18 +294,6 @@ static void __init parse_setup_data(void)
 	}
 }
 
-#ifdef CONFIG_PCI_MMCONFIG
-extern void __cpuinit fam10h_check_enable_mmcfg(void);
-extern void __init check_enable_amd_mmconf_dmi(void);
-#else
-void __cpuinit fam10h_check_enable_mmcfg(void)
-{
-}
-void __init check_enable_amd_mmconf_dmi(void)
-{
-}
-#endif
-
 /*
  * setup_arch - architecture-specific boot-time initializations
  *
diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h
new file mode 100644
index 00000000000..95beda07c6f
--- /dev/null
+++ b/include/asm-x86/mmconfig.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_MMCONFIG_H
+#define _ASM_MMCONFIG_H
+
+#ifdef CONFIG_PCI_MMCONFIG
+extern void __cpuinit fam10h_check_enable_mmcfg(void);
+extern void __init check_enable_amd_mmconf_dmi(void);
+#else
+static inline void fam10h_check_enable_mmcfg(void) { }
+static inline void check_enable_amd_mmconf_dmi(void) { }
+#endif
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 3b6b9293d0f8e1b11630102013ca2a1dcef17d44 Mon Sep 17 00:00:00 2001
From: Kristian Høgsberg <krh@redhat.com>
Date: Thu, 29 May 2008 18:31:15 -0400
Subject: x86: Honor 'quiet' command line option in real mode boot
 decompressor.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch lets the early real mode code look for the 'quiet' option
on the kernel command line and pass a loadflag to the decompressor.
When this flag is set, we suppress the "Decompressing Linux... Parsing
ELF... done." messages.

Signed-off-by: Kristian Høgsberg <krh@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/boot/compressed/misc.c | 13 ++++++++++---
 arch/x86/boot/main.c            |  4 ++++
 include/asm-x86/bootparam.h     |  1 +
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 74ed3c075ee..d10e7274e1f 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -189,6 +189,7 @@ static void gzip_release(void **);
  * This is set up by the setup-routine at boot-time
  */
 static struct boot_params *real_mode;		/* Pointer to real-mode data */
+static int quiet;
 
 extern unsigned char input_data[];
 extern int input_len;
@@ -391,7 +392,8 @@ static void parse_elf(void *output)
 		return;
 	}
 
-	putstr("Parsing ELF... ");
+	if (!quiet)
+		putstr("Parsing ELF... ");
 
 	phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
 	if (!phdrs)
@@ -426,6 +428,9 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
 {
 	real_mode = rmode;
 
+	if (real_mode->hdr.loadflags & QUIET_FLAG)
+		quiet = 1;
+
 	if (real_mode->screen_info.orig_video_mode == 7) {
 		vidmem = (char *) 0xb0000;
 		vidport = 0x3b4;
@@ -461,9 +466,11 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
 #endif
 
 	makecrc();
-	putstr("\nDecompressing Linux... ");
+	if (!quiet)
+		putstr("\nDecompressing Linux... ");
 	gunzip();
 	parse_elf(output);
-	putstr("done.\nBooting the kernel.\n");
+	if (!quiet)
+		putstr("done.\nBooting the kernel.\n");
 	return;
 }
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 77569a4a3be..2296164b54d 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -165,6 +165,10 @@ void main(void)
 	/* Set the video mode */
 	set_video();
 
+	/* Parse command line for 'quiet' and pass it to decompressor. */
+	if (cmdline_find_option_bool("quiet"))
+		boot_params.hdr.loadflags |= QUIET_FLAG;
+
 	/* Do the last things and invoke protected mode */
 	go_to_protected_mode();
 }
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
index f62f4733606..3e36ba8f288 100644
--- a/include/asm-x86/bootparam.h
+++ b/include/asm-x86/bootparam.h
@@ -40,6 +40,7 @@ struct setup_header {
 	__u8	type_of_loader;
 	__u8	loadflags;
 #define LOADED_HIGH	(1<<0)
+#define QUIET_FLAG	(1<<5)
 #define KEEP_SEGMENTS	(1<<6)
 #define CAN_USE_HEAP	(1<<7)
 	__u16	setup_move_size;
-- 
cgit v1.2.3-70-g09d2


From f0d43100f13be0fa5bf52741d7084bb27f00e621 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 29 May 2008 12:56:36 -0700
Subject: x86: extend e820 early_res support 32bit -fix #3

introduce init_pg_table_start, so xen PV could specify the value.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head32.c   | 3 ++-
 arch/x86/kernel/head_32.S  | 2 ++
 arch/x86/kernel/setup_32.c | 7 +++++++
 arch/x86/lguest/boot.c     | 5 +++--
 arch/x86/xen/enlighten.c   | 3 ++-
 include/asm-x86/setup.h    | 4 +++-
 6 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index c216d3c2a99..0b6cb9fba71 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -76,7 +76,8 @@ void __init i386_start_kernel(void)
 		reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
 	}
 #endif
-	reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE");
+	reserve_early(init_pg_tables_start, init_pg_tables_end,
+			"INIT_PG_TABLE");
 
 	reserve_ebda_region();
 
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b2cc73768a9..bef4618fead 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -194,6 +194,7 @@ default_entry:
 	xorl %ebx,%ebx				/* %ebx is kept at zero */
 
 	movl $pa(pg0), %edi
+	movl %edi, pa(init_pg_tables_start)
 	movl $pa(swapper_pg_pmd), %edx
 	movl $PTE_ATTR, %eax
 10:
@@ -228,6 +229,7 @@ default_entry:
 page_pde_offset = (__PAGE_OFFSET >> 20);
 
 	movl $pa(pg0), %edi
+	movl %edi, pa(init_pg_tables_start)
 	movl $pa(swapper_pg_dir), %edx
 	movl $PTE_ATTR, %eax
 10:
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 08b47a2f7af..de9c5ee77d0 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -71,6 +71,7 @@
 /* This value is set up by the early boot code to point to the value
    immediately after the boot time page tables.  It contains a *physical*
    address, and must not be in the .bss segment! */
+unsigned long init_pg_tables_start __initdata = ~0UL;
 unsigned long init_pg_tables_end __initdata = ~0UL;
 
 /*
@@ -485,6 +486,10 @@ static void __init reserve_initrd(void)
 		return;
 	}
 
+	printk(KERN_INFO "old RAMDISK: %08llx - %08llx\n", ramdisk_image,
+			ramdisk_end);
+
+
 	if (ramdisk_end <= end_of_lowmem) {
 		/* All in lowmem, easy case */
 		/*
@@ -511,6 +516,8 @@ static void __init reserve_initrd(void)
 			 "NEW RAMDISK");
 	initrd_start = ramdisk_here + PAGE_OFFSET;
 	initrd_end   = initrd_start + ramdisk_size;
+	printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
+			 ramdisk_here, ramdisk_here + ramdisk_size);
 
 	do_relocate_initrd = true;
 }
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index af65b2da3ba..bf7c34a3aae 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1011,6 +1011,7 @@ __init void lguest_init(void)
 	 * clobbered.  The Launcher places our initial pagetables somewhere at
 	 * the top of our physical memory, so we don't need extra space: set
 	 * init_pg_tables_end to the end of the kernel. */
+	init_pg_tables_start = __pa(pg0);
 	init_pg_tables_end = __pa(pg0);
 
 	/* Load the %fs segment register (the per-cpu segment register) with
@@ -1064,9 +1065,9 @@ __init void lguest_init(void)
 	pm_power_off = lguest_power_off;
 	machine_ops.restart = lguest_restart;
 
-	/* Now we're set up, call start_kernel() in init/main.c and we proceed
+	/* Now we're set up, call i386_start_kernel() in head32.c and we proceed
 	 * to boot as normal.  It never returns. */
-	start_kernel();
+	i386_start_kernel();
 }
 /*
  * This marks the end of stage II of our journey, The Guest.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c8a56e457d6..ccc7b84ddab 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1211,6 +1211,7 @@ asmlinkage void __init xen_start_kernel(void)
 
 	pgd = (pgd_t *)xen_start_info->pt_base;
 
+	init_pg_tables_start = __pa(pgd);
 	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
 
 	init_mm.pgd = pgd; /* use the Xen pagetables to start */
@@ -1246,5 +1247,5 @@ asmlinkage void __init xen_start_kernel(void)
 		add_preferred_console("hvc", 0, NULL);
 
 	/* Start the world */
-	start_kernel();
+	i386_start_kernel();
 }
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index ffa0f540fc7..64f5f0c11d6 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -59,9 +59,11 @@ int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
 void __init add_memory_region(unsigned long long start,
 			      unsigned long long size, int type);
 
-extern unsigned long init_pg_tables_end;
 
+void __init i386_start_kernel(void);
 
+extern unsigned long init_pg_tables_start;
+extern unsigned long init_pg_tables_end;
 
 #endif /* __i386__ */
 #endif /* _SETUP */
-- 
cgit v1.2.3-70-g09d2


From a5481280b29b6a3db912ec100498bd31eaa6d2db Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 29 May 2008 12:58:37 -0700
Subject: x86: extend e820 early_res support 32bit -fix #5

reserve early numa kva, so it will not clash with new RAMDISK

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_32.c  |  1 -
 arch/x86/mm/discontig_32.c  | 12 +++++-------
 include/asm-x86/mmzone_32.h |  4 ----
 3 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index de9c5ee77d0..6f40cb560ea 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -614,7 +614,6 @@ void __init setup_bootmem_allocator(void)
 	 */
 	find_smp_config();
 #endif
-	numa_kva_reserve();
 	reserve_crashkernel();
 
 	reserve_ibft_region();
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 47749727907..55fdbab6b01 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -357,6 +357,11 @@ unsigned long __init setup_memory(void)
 	printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
 		kva_start_pfn, max_low_pfn);
 	printk("max_pfn = %ld\n", max_pfn);
+
+	/* avoid clash with initrd */
+	reserve_early(kva_start_pfn<<PAGE_SHIFT,
+		      (kva_start_pfn + kva_pages)<<PAGE_SHIFT,
+		     "KVA PG");
 #ifdef CONFIG_HIGHMEM
 	highstart_pfn = highend_pfn = max_pfn;
 	if (max_pfn > system_max_low_pfn)
@@ -392,13 +397,6 @@ unsigned long __init setup_memory(void)
 	return max_low_pfn;
 }
 
-void __init numa_kva_reserve(void)
-{
-	if (kva_pages)
-		reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages),
-				BOOTMEM_DEFAULT);
-}
-
 void __init zone_sizes_init(void)
 {
 	int nid;
diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h
index cb2cad0b65a..faef751181b 100644
--- a/include/asm-x86/mmzone_32.h
+++ b/include/asm-x86/mmzone_32.h
@@ -38,16 +38,12 @@ static inline void get_memcfg_numa(void)
 }
 
 extern int early_pfn_to_nid(unsigned long pfn);
-extern void numa_kva_reserve(void);
 
 #else /* !CONFIG_NUMA */
 
 #define get_memcfg_numa get_memcfg_numa_flat
 #define get_zholes_size(n) (0)
 
-static inline void numa_kva_reserve(void)
-{
-}
 #endif /* CONFIG_NUMA */
 
 #ifdef CONFIG_DISCONTIGMEM
-- 
cgit v1.2.3-70-g09d2


From 831d991821daedd4839073dbca55514432ef1768 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 3 Sep 2007 10:17:39 +0200
Subject: x86: add PCI extended config space access for AMD Barcelona

This patch implements PCI extended configuration space access for
AMD's Barcelona CPUs. It extends the method using CF8/CFC IO
addresses. An x86 capability bit has been introduced that is set for
CPUs supporting PCI extended config space accesses.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c    |  4 ++++
 arch/x86/kernel/cpu/amd_64.c |  3 +++
 arch/x86/kernel/setup.c      | 13 +++++++++++++
 arch/x86/kernel/setup.h      | 26 ++++++++++++++++++++++++++
 arch/x86/kernel/setup_64.c   |  2 ++
 arch/x86/pci/direct.c        | 21 +++++++++++++++------
 include/asm-x86/cpufeature.h |  2 ++
 7 files changed, 65 insertions(+), 6 deletions(-)
 create mode 100644 arch/x86/kernel/setup.h

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 24586682829..99221f9834e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -6,6 +6,7 @@
 #include <asm/apic.h>
 
 #include <mach_apic.h>
+#include "../setup.h"
 #include "cpu.h"
 
 /*
@@ -308,6 +309,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 	if (cpu_has_xmm2)
 		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+
+	if (c->x86 == 0x10)
+		amd_enable_pci_ext_cfg(c);
 }
 
 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index c815c2c0484..180097e9921 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -217,6 +217,9 @@ void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 == 0x10)
 		fam10h_check_enable_mmcfg();
 
+	if (c->x86 == 0x10)
+		amd_enable_pci_ext_cfg(c);
+
 	if (amd_apic_timer_broken())
 		disable_apic_timer = 1;
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 6f80b852a19..d8f17123401 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -136,4 +136,17 @@ void __init setup_per_cpu_areas(void)
 	setup_cpumask_of_cpu();
 }
 
+#define ENABLE_CF8_EXT_CFG      (1ULL << 46)
+
+void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c)
+{
+	u64 reg;
+	rdmsrl(MSR_AMD64_NB_CFG, reg);
+	if (!(reg & ENABLE_CF8_EXT_CFG)) {
+		reg |= ENABLE_CF8_EXT_CFG;
+		wrmsrl(MSR_AMD64_NB_CFG, reg);
+	}
+	set_cpu_cap(c, X86_FEATURE_PCI_EXT_CFG);
+}
+
 #endif
diff --git a/arch/x86/kernel/setup.h b/arch/x86/kernel/setup.h
new file mode 100644
index 00000000000..66cc2c7f961
--- /dev/null
+++ b/arch/x86/kernel/setup.h
@@ -0,0 +1,26 @@
+/*
+ * Internal declarations for shared x86 setup code.
+ *
+ * Copyright (c) 2008 Advanced Micro Devices, Inc.
+ * Contributed by Robert Richter <robert.richter@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307 USA
+ */
+
+#ifndef _ARCH_X86_KERNEL_SETUP_H
+
+extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c);
+
+#endif	/* _ARCH_X86_KERNEL_SETUP_H */
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 25afdd80a3c..215bd67e5b6 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -73,6 +73,8 @@
 #include <asm/pat.h>
 #include <asm/mmconfig.h>
 
+#include "setup.h"
+
 #include <mach_apic.h>
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 21d1e0e0d53..27d61b63def 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -8,18 +8,21 @@
 #include "pci.h"
 
 /*
- * Functions for accessing PCI configuration space with type 1 accesses
+ * Functions for accessing PCI base (first 256 bytes) and extended
+ * (4096 bytes per PCI function) configuration space with type 1
+ * accesses.
  */
 
 #define PCI_CONF1_ADDRESS(bus, devfn, reg) \
-	(0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
+	(0x80000000 | ((reg & 0xF00) << 16) | (bus << 16) \
+	| (devfn << 8) | (reg & 0xFC))
 
 static int pci_conf1_read(unsigned int seg, unsigned int bus,
 			  unsigned int devfn, int reg, int len, u32 *value)
 {
 	unsigned long flags;
 
-	if ((bus > 255) || (devfn > 255) || (reg > 255)) {
+	if ((bus > 255) || (devfn > 255) || (reg > 4095)) {
 		*value = -1;
 		return -EINVAL;
 	}
@@ -50,7 +53,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus,
 {
 	unsigned long flags;
 
-	if ((bus > 255) || (devfn > 255) || (reg > 255)) 
+	if ((bus > 255) || (devfn > 255) || (reg > 4095))
 		return -EINVAL;
 
 	spin_lock_irqsave(&pci_config_lock, flags);
@@ -260,10 +263,16 @@ void __init pci_direct_init(int type)
 		return;
 	printk(KERN_INFO "PCI: Using configuration type %d for base access\n",
 		 type);
-	if (type == 1)
+	if (type == 1) {
 		raw_pci_ops = &pci_direct_conf1;
-	else
+		if (!raw_pci_ext_ops && cpu_has_pci_ext_cfg) {
+			printk(KERN_INFO "PCI: Using configuration type 1 "
+			       "for extended access\n");
+			raw_pci_ext_ops = &pci_direct_conf1;
+		}
+	} else {
 		raw_pci_ops = &pci_direct_conf2;
+	}
 }
 
 int __init pci_direct_probe(void)
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 0d609c837a4..40fcbba00f1 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -79,6 +79,7 @@
 #define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well on this CPU */
 #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
 #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
+#define X86_FEATURE_PCI_EXT_CFG	(3*32+19) /* PCI extended cfg access */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
@@ -187,6 +188,7 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_gbpages		boot_cpu_has(X86_FEATURE_GBPAGES)
 #define cpu_has_arch_perfmon	boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_pat		boot_cpu_has(X86_FEATURE_PAT)
+#define cpu_has_pci_ext_cfg	boot_cpu_has(X86_FEATURE_PCI_EXT_CFG)
 
 #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
 # define cpu_has_invlpg		1
-- 
cgit v1.2.3-70-g09d2


From 6fc92866a4a6778a9732a14b61f70cb9014b2a1a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 2 Jun 2008 10:54:16 +0200
Subject: fix build bug in "x86: add PCI extended config space access for AMD
 Barcelona"

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mmconfig.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h
index 95beda07c6f..46d6bb135df 100644
--- a/include/asm-x86/mmconfig.h
+++ b/include/asm-x86/mmconfig.h
@@ -9,4 +9,6 @@ static inline void fam10h_check_enable_mmcfg(void) { }
 static inline void check_enable_amd_mmconf_dmi(void) { }
 #endif
 
+extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c);
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From c46e62f73569d7ef42255bd6f31e35925b7f1492 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@suse.cz>
Date: Wed, 28 May 2008 12:42:57 +0200
Subject: i8259: fix final ugliness

Introduce IRQx_VECTOR on 32-bit, so that #ifdef noise is kept
down. There should be no object code change.

[ mingo@elte.hu: merged to x86/irq not x86/i8259 due to x86/irq having
  restructured the vector code into asm-x86/irq_vectors.h, which this
  patch touches. ]

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/i8259.c       | 22 ++++++++--------------
 include/asm-x86/irq_vectors.h |  9 ++++++---
 2 files changed, 14 insertions(+), 17 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 7a0fda8f01b..dc92b49d920 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -297,34 +297,28 @@ void init_8259A(int auto_eoi)
 	 * outb_pic - this has to work on a wide range of PC hardware.
 	 */
 	outb_pic(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
-#ifndef CONFIG_X86_64
-	outb_pic(0x20 + 0, PIC_MASTER_IMR);	/* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
-	outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);	/* 8259A-1 (the master) has a slave on IR2 */
-#else /* CONFIG_X86_64 */
-	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
+
+	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64,
+	                       to 0x20-0x27 on i386 */
 	outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
+
 	/* 8259A-1 (the master) has a slave on IR2 */
-	outb_pic(0x04, PIC_MASTER_IMR);
-#endif /* CONFIG_X86_64 */
+	outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);
+
 	if (auto_eoi)	/* master does Auto EOI */
 		outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
 	else		/* master expects normal EOI */
 		outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
 
 	outb_pic(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
-#ifndef CONFIG_X86_64
-	outb_pic(0x20 + 8, PIC_SLAVE_IMR);	/* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
-	outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);	/* 8259A-2 is a slave on master's IR2 */
-	outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */
-#else /* CONFIG_X86_64 */
-	/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
+
+	/* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */
 	outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
 	/* 8259A-2 is a slave on master's IR2 */
 	outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
 	/* (slave's support for AEOI in flat mode is to be investigated) */
 	outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
 
-#endif /* CONFIG_X86_64 */
 	if (auto_eoi)
 		/*
 		 * In AEOI mode we just have to mask the interrupt
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
index 3cb6d8c77b3..b58581e2e24 100644
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -18,17 +18,20 @@
 #endif
 
 /*
- * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit.
- *
  * Reserve the lowest usable priority level 0x20 - 0x2f for triggering
  * cleanup after irq migration on 64 bit.
  */
 #define IRQ_MOVE_CLEANUP_VECTOR	FIRST_EXTERNAL_VECTOR
 
 /*
- * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit
+ * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit.
+ * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit.
  */
+#ifdef CONFIG_X86_32
+#define IRQ0_VECTOR		(FIRST_EXTERNAL_VECTOR)
+#else
 #define IRQ0_VECTOR		(FIRST_EXTERNAL_VECTOR + 0x10)
+#endif
 #define IRQ1_VECTOR		(IRQ0_VECTOR + 1)
 #define IRQ2_VECTOR		(IRQ0_VECTOR + 2)
 #define IRQ3_VECTOR		(IRQ0_VECTOR + 3)
-- 
cgit v1.2.3-70-g09d2


From 1a5726528a70bb239bdd149aef7f2155cd2b1699 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 2 Jun 2008 12:21:36 +0200
Subject: fix build bug in "x86: add PCI extended config space access for AMD
 Barcelona"

---
 arch/x86/kernel/cpu/amd.c  | 1 +
 include/asm-x86/mmconfig.h | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 99221f9834e..656b40aed64 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -4,6 +4,7 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
+#include <asm/mmconfig.h>
 
 #include <mach_apic.h>
 #include "../setup.h"
diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h
index 46d6bb135df..691798fbee1 100644
--- a/include/asm-x86/mmconfig.h
+++ b/include/asm-x86/mmconfig.h
@@ -9,6 +9,10 @@ static inline void fam10h_check_enable_mmcfg(void) { }
 static inline void check_enable_amd_mmconf_dmi(void) { }
 #endif
 
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_64)
 extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c);
+#else
+static inline void amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c) { }
+#endif
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From c1f64a58003fd2efaa725a857e269a15f765791a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 27 May 2008 09:47:13 -0700
Subject: x86: MMIO and gcc re-ordering issue

On Tue, 27 May 2008, Linus Torvalds wrote:
>
> Expecting people to fix up all drivers is simply not going to happen. And
> serializing things shouldn't be *that* expensive. People who cannot take
> the expense can continue to use the magic __raw_writel() etc stuff.

Of course, for non-x86, you kind of have to expect drivers to be
well-behaved, so non-x86 can probably avoid this simply because there are
less relevant drivers involved.

Here's a UNTESTED patch for x86 that may or may not compile and work, and
which serializes (on a compiler level) the IO accesses against regular
memory accesses.

__read[bwlq]()/__write[bwlq]() are not serialized with a :"memory"
barrier, although since they still use "asm volatile" I suspect that i
practice they are probably serial too. Did not look very closely at any
generated code (only did a trivial test to see that the code looks
*roughly* correct).

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/io.h    | 56 ++++++++++++++++++++++++++++++++++++++
 include/asm-x86/io_32.h | 49 ----------------------------------
 include/asm-x86/io_64.h | 71 -------------------------------------------------
 3 files changed, 56 insertions(+), 120 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h
index d5b11f60dbd..8e9eca93f9b 100644
--- a/include/asm-x86/io.h
+++ b/include/asm-x86/io.h
@@ -3,6 +3,62 @@
 
 #define ARCH_HAS_IOREMAP_WC
 
+#include <linux/compiler.h>
+
+#define build_mmio_read(name, size, type, reg, barrier) \
+static inline type name(const volatile void __iomem *addr) \
+{ type ret; asm volatile("mov" size " %1,%0":"=" reg (ret) \
+:"m" (*(volatile type __force *)addr) barrier); return ret; }
+
+#define build_mmio_write(name, size, type, reg, barrier) \
+static inline void name(type val, volatile void __iomem *addr) \
+{ asm volatile("mov" size " %0,%1": :reg (val), \
+"m" (*(volatile type __force *)addr) barrier); }
+
+build_mmio_read(readb, "b", unsigned char, "q", :"memory")
+build_mmio_read(readw, "w", unsigned short, "r", :"memory")
+build_mmio_read(readl, "l", unsigned int, "r", :"memory")
+
+build_mmio_read(__readb, "b", unsigned char, "q", )
+build_mmio_read(__readw, "w", unsigned short, "r", )
+build_mmio_read(__readl, "l", unsigned int, "r", )
+
+build_mmio_write(writeb, "b", unsigned char, "q", :"memory")
+build_mmio_write(writew, "w", unsigned short, "r", :"memory")
+build_mmio_write(writel, "l", unsigned int, "r", :"memory")
+
+build_mmio_write(__writeb, "b", unsigned char, "q", )
+build_mmio_write(__writew, "w", unsigned short, "r", )
+build_mmio_write(__writel, "l", unsigned int, "r", )
+
+#define readb_relaxed(a) __readb(a)
+#define readw_relaxed(a) __readw(a)
+#define readl_relaxed(a) __readl(a)
+#define __raw_readb __readb
+#define __raw_readw __readw
+#define __raw_readl __readl
+
+#define __raw_writeb __writeb
+#define __raw_writew __writew
+#define __raw_writel __writel
+
+#define mmiowb() barrier()
+
+#ifdef CONFIG_X86_64
+build_mmio_read(readq, "q", unsigned long, "r", :"memory")
+build_mmio_read(__readq, "q", unsigned long, "r", )
+build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
+build_mmio_write(__writeq, "q", unsigned long, "r", )
+
+#define readq_relaxed(a) __readq(a)
+#define __raw_readq __readq
+#define __raw_writeq writeq
+
+/* Let people know we have them */
+#define readq readq
+#define writeq writeq
+#endif
+
 #ifdef CONFIG_X86_32
 # include "io_32.h"
 #else
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index 049e81e797a..d71be8df979 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -149,55 +149,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
 #define virt_to_bus virt_to_phys
 #define bus_to_virt phys_to_virt
 
-/*
- * readX/writeX() are used to access memory mapped devices. On some
- * architectures the memory mapped IO stuff needs to be accessed
- * differently. On the x86 architecture, we just read/write the
- * memory location directly.
- */
-
-static inline unsigned char readb(const volatile void __iomem *addr)
-{
-	return *(volatile unsigned char __force *)addr;
-}
-
-static inline unsigned short readw(const volatile void __iomem *addr)
-{
-	return *(volatile unsigned short __force *)addr;
-}
-
-static inline unsigned int readl(const volatile void __iomem *addr)
-{
-	return *(volatile unsigned int __force *) addr;
-}
-
-#define readb_relaxed(addr) readb(addr)
-#define readw_relaxed(addr) readw(addr)
-#define readl_relaxed(addr) readl(addr)
-#define __raw_readb readb
-#define __raw_readw readw
-#define __raw_readl readl
-
-static inline void writeb(unsigned char b, volatile void __iomem *addr)
-{
-	*(volatile unsigned char __force *)addr = b;
-}
-
-static inline void writew(unsigned short b, volatile void __iomem *addr)
-{
-	*(volatile unsigned short __force *)addr = b;
-}
-
-static inline void writel(unsigned int b, volatile void __iomem *addr)
-{
-	*(volatile unsigned int __force *)addr = b;
-}
-#define __raw_writeb writeb
-#define __raw_writew writew
-#define __raw_writel writel
-
-#define mmiowb()
-
 static inline void
 memset_io(volatile void __iomem *addr, unsigned char val, int count)
 {
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index 0930bedf9e4..ddd8058a502 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -204,77 +204,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
 #define virt_to_bus virt_to_phys
 #define bus_to_virt phys_to_virt
 
-/*
- * readX/writeX() are used to access memory mapped devices. On some
- * architectures the memory mapped IO stuff needs to be accessed
- * differently. On the x86 architecture, we just read/write the
- * memory location directly.
- */
-
-static inline __u8 __readb(const volatile void __iomem *addr)
-{
-	return *(__force volatile __u8 *)addr;
-}
-
-static inline __u16 __readw(const volatile void __iomem *addr)
-{
-	return *(__force volatile __u16 *)addr;
-}
-
-static __always_inline __u32 __readl(const volatile void __iomem *addr)
-{
-	return *(__force volatile __u32 *)addr;
-}
-
-static inline __u64 __readq(const volatile void __iomem *addr)
-{
-	return *(__force volatile __u64 *)addr;
-}
-
-#define readb(x) __readb(x)
-#define readw(x) __readw(x)
-#define readl(x) __readl(x)
-#define readq(x) __readq(x)
-#define readb_relaxed(a) readb(a)
-#define readw_relaxed(a) readw(a)
-#define readl_relaxed(a) readl(a)
-#define readq_relaxed(a) readq(a)
-#define __raw_readb readb
-#define __raw_readw readw
-#define __raw_readl readl
-#define __raw_readq readq
-
-#define mmiowb()
-
-static inline void __writel(__u32 b, volatile void __iomem *addr)
-{
-	*(__force volatile __u32 *)addr = b;
-}
-
-static inline void __writeq(__u64 b, volatile void __iomem *addr)
-{
-	*(__force volatile __u64 *)addr = b;
-}
-
-static inline void __writeb(__u8 b, volatile void __iomem *addr)
-{
-	*(__force volatile __u8 *)addr = b;
-}
-
-static inline void __writew(__u16 b, volatile void __iomem *addr)
-{
-	*(__force volatile __u16 *)addr = b;
-}
-
-#define writeq(val, addr) __writeq((val), (addr))
-#define writel(val, addr) __writel((val), (addr))
-#define writew(val, addr) __writew((val), (addr))
-#define writeb(val, addr) __writeb((val), (addr))
-#define __raw_writeb writeb
-#define __raw_writew writew
-#define __raw_writel writel
-#define __raw_writeq writeq
-
 void __memcpy_fromio(void *, unsigned long, unsigned);
 void __memcpy_toio(unsigned long, const void *, unsigned);
 
-- 
cgit v1.2.3-70-g09d2


From 83bea8e1fa0c47b30664b6f92397c016c22f77fb Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Tue, 27 May 2008 21:03:46 +0200
Subject: x86: fix incomplete include guard in include/asm-x86/seccomp_32.h

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/seccomp_32.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/seccomp_32.h b/include/asm-x86/seccomp_32.h
index 18da19e89bf..36e71c5f306 100644
--- a/include/asm-x86/seccomp_32.h
+++ b/include/asm-x86/seccomp_32.h
@@ -1,4 +1,5 @@
 #ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
 
 #include <linux/thread_info.h>
 
-- 
cgit v1.2.3-70-g09d2


From 6330a30a76c1e62d4b4ec238368957f8febf9113 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Wed, 28 May 2008 09:46:19 +0200
Subject: x86: break mutual header inclusion

This breaks up the mutual inclusion between headers ptrace.h and vm86.h
by moving some small part of vm86.h which is needed by ptrace.h into
processor-flags.h.

We also try to move #include lines to the top.

This has been compile tested on x86_32 and x86_64 defconfig, and run
through 'make headers_check'.

Cc: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/processor-flags.h | 6 ++++++
 include/asm-x86/ptrace.h          | 8 +++++---
 include/asm-x86/vm86.h            | 8 +-------
 3 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h
index 199cab107d8..092b39b3a7e 100644
--- a/include/asm-x86/processor-flags.h
+++ b/include/asm-x86/processor-flags.h
@@ -88,4 +88,10 @@
 #define CX86_ARR_BASE	0xc4
 #define CX86_RCR_BASE	0xdc
 
+#ifdef CONFIG_VM86
+#define X86_VM_MASK	X86_EFLAGS_VM
+#else
+#define X86_VM_MASK	0 /* No VM86 support */
+#endif
+
 #endif	/* __ASM_I386_PROCESSOR_FLAGS_H */
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 9f922b0b95d..8a71db803da 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -3,7 +3,12 @@
 
 #include <linux/compiler.h>	/* For __user */
 #include <asm/ptrace-abi.h>
+#include <asm/processor-flags.h>
 
+#ifdef __KERNEL__
+#include <asm/ds.h>		/* the DS BTS struct is used for ptrace too */
+#include <asm/segment.h>
+#endif
 
 #ifndef __ASSEMBLY__
 
@@ -55,9 +60,6 @@ struct pt_regs {
 	unsigned long ss;
 };
 
-#include <asm/vm86.h>
-#include <asm/segment.h>
-
 #endif /* __KERNEL__ */
 
 #else /* __i386__ */
diff --git a/include/asm-x86/vm86.h b/include/asm-x86/vm86.h
index cbf4a0effa7..5ce351325e0 100644
--- a/include/asm-x86/vm86.h
+++ b/include/asm-x86/vm86.h
@@ -115,7 +115,6 @@ struct vm86plus_info_struct {
 	unsigned long is_vm86pus:1;	      /* for vm86 internal use */
 	unsigned char vm86dbg_intxxtab[32];   /* for debugger */
 };
-
 struct vm86plus_struct {
 	struct vm86_regs regs;
 	unsigned long flags;
@@ -128,11 +127,7 @@ struct vm86plus_struct {
 
 #ifdef __KERNEL__
 
-#ifdef CONFIG_VM86
-#define X86_VM_MASK	X86_EFLAGS_VM
-#else
-#define X86_VM_MASK	0 /* No VM86 support */
-#endif
+#include <asm/ptrace.h>
 
 /*
  * This is the (kernel) stack-layout when we have done a "SAVE_ALL" from vm86
@@ -142,7 +137,6 @@ struct vm86plus_struct {
  * at the end of the structure. Look at ptrace.h to see the "normal"
  * setup. For user space layout see 'struct vm86_regs' above.
  */
-#include <asm/ptrace.h>
 
 struct kernel_vm86_regs {
 /*
-- 
cgit v1.2.3-70-g09d2


From fb093eab6d8963a085f112773284f2bcb3d7907b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 27 May 2008 16:29:20 -0700
Subject: x86: remove duplicated e820 func in setup.h

we already have them in e820.h

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/setup.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 64f5f0c11d6..9e163fc3e98 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -50,15 +50,9 @@ extern struct boot_params boot_params;
  */
 #define LOWMEMSIZE()	(0x9f000)
 
-struct e820entry;
-
 char * __init machine_specific_memory_setup(void);
 char *memory_setup(void);
 
-int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
-void __init add_memory_region(unsigned long long start,
-			      unsigned long long size, int type);
-
 
 void __init i386_start_kernel(void);
 
-- 
cgit v1.2.3-70-g09d2


From 9f5314fb4d556d3132c784d0df47352b2830ca53 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Wed, 28 May 2008 09:51:18 -0500
Subject: x86, uv: update macros used by UV platform

Update the UV address macros to better describe the
fields of UV physical addresses. Improve comments
in the header files. Add additional MMR definitions.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/genx2apic_uv_x.c | 141 +++++++----
 include/asm-x86/uv/uv_hub.h      | 188 ++++++++++-----
 include/asm-x86/uv/uv_mmrs.h     | 509 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 731 insertions(+), 107 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index ebf13908a74..45e84acca8a 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -5,7 +5,7 @@
  *
  * SGI UV APIC functions (note: not an Intel compatible APIC)
  *
- * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/threads.h>
@@ -55,37 +55,37 @@ static cpumask_t uv_vector_allocation_domain(int cpu)
 int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
 {
 	unsigned long val;
-	int nasid;
+	int pnode;
 
-	nasid = uv_apicid_to_nasid(phys_apicid);
+	pnode = uv_apicid_to_pnode(phys_apicid);
 	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
 	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
 	    (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
 	    APIC_DM_INIT;
-	uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
+	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 	mdelay(10);
 
 	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
 	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
 	    (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
 	    APIC_DM_STARTUP;
-	uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
+	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 	return 0;
 }
 
 static void uv_send_IPI_one(int cpu, int vector)
 {
 	unsigned long val, apicid, lapicid;
-	int nasid;
+	int pnode;
 
 	apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */
 	lapicid = apicid & 0x3f;		/* ZZZ macro needed */
-	nasid = uv_apicid_to_nasid(apicid);
+	pnode = uv_apicid_to_pnode(apicid);
 	val =
 	    (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid <<
 					      UVH_IPI_INT_APIC_ID_SHFT) |
 	    (vector << UVH_IPI_INT_VECTOR_SHFT);
-	uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
+	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 }
 
 static void uv_send_IPI_mask(cpumask_t mask, int vector)
@@ -159,39 +159,81 @@ struct genapic apic_x2apic_uv_x = {
 	.phys_pkg_id = phys_pkg_id,	/* Fixme ZZZ */
 };
 
-static __cpuinit void set_x2apic_extra_bits(int nasid)
+static __cpuinit void set_x2apic_extra_bits(int pnode)
 {
-	__get_cpu_var(x2apic_extra_bits) = ((nasid >> 1) << 6);
+	__get_cpu_var(x2apic_extra_bits) = (pnode << 6);
 }
 
 /*
  * Called on boot cpu.
  */
+static __init int boot_pnode_to_blade(int pnode)
+{
+	int blade;
+
+	for (blade = 0; blade < uv_num_possible_blades(); blade++)
+		if (pnode == uv_blade_info[blade].pnode)
+			return blade;
+	BUG();
+}
+
+struct redir_addr {
+	unsigned long redirect;
+	unsigned long alias;
+};
+
+#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
+
+static __initdata struct redir_addr redir_addrs[] = {
+	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG},
+	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG},
+	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG},
+};
+
+static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
+{
+	union uvh_si_alias0_overlay_config_u alias;
+	union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
+		alias.v = uv_read_local_mmr(redir_addrs[i].alias);
+		if (alias.s.base == 0) {
+			*size = (1UL << alias.s.m_alias);
+			redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
+			*base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
+			return;
+		}
+	}
+	BUG();
+}
+
 static __init void uv_system_init(void)
 {
 	union uvh_si_addr_map_config_u m_n_config;
-	int bytes, nid, cpu, lcpu, nasid, last_nasid, blade;
-	unsigned long mmr_base;
+	union uvh_node_id_u node_id;
+	unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
+	int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
+	unsigned long mmr_base, present;
 
 	m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
+	m_val = m_n_config.s.m_skt;
+	n_val = m_n_config.s.n_skt;
 	mmr_base =
 	    uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
 	    ~UV_MMR_ENABLE;
 	printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
 
-	last_nasid = -1;
-	for_each_possible_cpu(cpu) {
-		nid = cpu_to_node(cpu);
-		nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu));
-		if (nasid != last_nasid)
-			uv_possible_blades++;
-		last_nasid = nasid;
-	}
+	for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
+		uv_possible_blades +=
+		  hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
 	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
 
 	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
 	uv_blade_info = alloc_bootmem_pages(bytes);
 
+	get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
+
 	bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
 	uv_node_to_blade = alloc_bootmem_pages(bytes);
 	memset(uv_node_to_blade, 255, bytes);
@@ -200,43 +242,56 @@ static __init void uv_system_init(void)
 	uv_cpu_to_blade = alloc_bootmem_pages(bytes);
 	memset(uv_cpu_to_blade, 255, bytes);
 
-	last_nasid = -1;
-	blade = -1;
-	lcpu = -1;
-	for_each_possible_cpu(cpu) {
-		nid = cpu_to_node(cpu);
-		nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu));
-		if (nasid != last_nasid) {
-			blade++;
-			lcpu = -1;
-			uv_blade_info[blade].nr_posible_cpus = 0;
+	blade = 0;
+	for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
+		present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
+		for (j = 0; j < 64; j++) {
+			if (!test_bit(j, &present))
+				continue;
+			uv_blade_info[blade].pnode = (i * 64 + j);
+			uv_blade_info[blade].nr_possible_cpus = 0;
 			uv_blade_info[blade].nr_online_cpus = 0;
+			blade++;
 		}
-		last_nasid = nasid;
-		lcpu++;
+	}
 
-		uv_cpu_hub_info(cpu)->m_val = m_n_config.s.m_skt;
-		uv_cpu_hub_info(cpu)->n_val = m_n_config.s.n_skt;
+	node_id.v = uv_read_local_mmr(UVH_NODE_ID);
+	gnode_upper = (((unsigned long)node_id.s.node_id) &
+		       ~((1 << n_val) - 1)) << m_val;
+
+	for_each_present_cpu(cpu) {
+		nid = cpu_to_node(cpu);
+		pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu));
+		blade = boot_pnode_to_blade(pnode);
+		lcpu = uv_blade_info[blade].nr_possible_cpus;
+		uv_blade_info[blade].nr_possible_cpus++;
+
+		uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
+		uv_cpu_hub_info(cpu)->lowmem_remap_top =
+					lowmem_redir_base + lowmem_redir_size;
+		uv_cpu_hub_info(cpu)->m_val = m_val;
+		uv_cpu_hub_info(cpu)->n_val = m_val;
 		uv_cpu_hub_info(cpu)->numa_blade_id = blade;
 		uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
-		uv_cpu_hub_info(cpu)->local_nasid = nasid;
-		uv_cpu_hub_info(cpu)->gnode_upper =
-		    nasid & ~((1 << uv_hub_info->n_val) - 1);
+		uv_cpu_hub_info(cpu)->pnode = pnode;
+		uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1;
+		uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
+		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
 		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
 		uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
-		uv_blade_info[blade].nasid = nasid;
-		uv_blade_info[blade].nr_posible_cpus++;
 		uv_node_to_blade[nid] = blade;
 		uv_cpu_to_blade[cpu] = blade;
 
-		printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, nasid %d, nid %d\n",
-		       cpu, per_cpu(x86_cpu_to_apicid, cpu), nasid, nid);
-		printk(KERN_DEBUG "UV   lcpu %d, blade %d\n", lcpu, blade);
+		printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, pnode %d, nid %d, "
+			"lcpu %d, blade %d\n",
+			cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid,
+			lcpu, blade);
 	}
 }
 
 /*
  * Called on each cpu to initialize the per_cpu UV data area.
+ * 	ZZZ hotplug not supported yet
  */
 void __cpuinit uv_cpu_init(void)
 {
@@ -246,5 +301,5 @@ void __cpuinit uv_cpu_init(void)
 	uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
 
 	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
-		set_x2apic_extra_bits(uv_hub_info->local_nasid);
+		set_x2apic_extra_bits(uv_hub_info->pnode);
 }
diff --git a/include/asm-x86/uv/uv_hub.h b/include/asm-x86/uv/uv_hub.h
index 26b9240d1e2..65004881de5 100644
--- a/include/asm-x86/uv/uv_hub.h
+++ b/include/asm-x86/uv/uv_hub.h
@@ -5,7 +5,7 @@
  *
  * SGI UV architectural definitions
  *
- * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
  */
 
 #ifndef __ASM_X86_UV_HUB_H__
@@ -20,26 +20,49 @@
 /*
  * Addressing Terminology
  *
- * 	NASID - network ID of a router, Mbrick or Cbrick. Nasid values of
- * 		routers always have low bit of 1, C/MBricks have low bit
- * 		equal to 0. Most addressing macros that target UV hub chips
- * 		right shift the NASID by 1 to exclude the always-zero bit.
+ *	M       - The low M bits of a physical address represent the offset
+ *		  into the blade local memory. RAM memory on a blade is physically
+ *		  contiguous (although various IO spaces may punch holes in
+ *		  it)..
  *
- *	SNASID - NASID right shifted by 1 bit.
+ * 	N	- Number of bits in the node portion of a socket physical
+ * 		  address.
+ *
+ * 	NASID   - network ID of a router, Mbrick or Cbrick. Nasid values of
+ * 	 	  routers always have low bit of 1, C/MBricks have low bit
+ * 		  equal to 0. Most addressing macros that target UV hub chips
+ * 		  right shift the NASID by 1 to exclude the always-zero bit.
+ * 		  NASIDs contain up to 15 bits.
+ *
+ *	GNODE   - NASID right shifted by 1 bit. Most mmrs contain gnodes instead
+ *		  of nasids.
+ *
+ * 	PNODE   - the low N bits of the GNODE. The PNODE is the most useful variant
+ * 		  of the nasid for socket usage.
+ *
+ *
+ *  NumaLink Global Physical Address Format:
+ *  +--------------------------------+---------------------+
+ *  |00..000|      GNODE             |      NodeOffset     |
+ *  +--------------------------------+---------------------+
+ *          |<-------53 - M bits --->|<--------M bits ----->
+ *
+ *	M - number of node offset bits (35 .. 40)
  *
  *
  *  Memory/UV-HUB Processor Socket Address Format:
- *  +--------+---------------+---------------------+
- *  |00..0000|    SNASID     |      NodeOffset     |
- *  +--------+---------------+---------------------+
- *           <--- N bits --->|<--------M bits ----->
+ *  +----------------+---------------+---------------------+
+ *  |00..000000000000|   PNODE       |      NodeOffset     |
+ *  +----------------+---------------+---------------------+
+ *                   <--- N bits --->|<--------M bits ----->
  *
- *	M number of node offset bits (35 .. 40)
- *	N number of SNASID bits (0 .. 10)
+ *	M - number of node offset bits (35 .. 40)
+ *	N - number of PNODE bits (0 .. 10)
  *
  *		Note: M + N cannot currently exceed 44 (x86_64) or 46 (IA64).
  *		The actual values are configuration dependent and are set at
- *		boot time
+ *		boot time. M & N values are set by the hardware/BIOS at boot.
+ *
  *
  * APICID format
  * 	NOTE!!!!!! This is the current format of the APICID. However, code
@@ -48,14 +71,14 @@
  *
  * 		1111110000000000
  * 		5432109876543210
- *		nnnnnnnnnnlc0cch
+ *		pppppppppplc0cch
  *		sssssssssss
  *
- *			n  = snasid bits
+ *			p  = pnode bits
  *			l =  socket number on board
  *			c  = core
  *			h  = hyperthread
- *			s  = bits that are in the socket CSR
+ *			s  = bits that are in the SOCKET_ID CSR
  *
  *	Note: Processor only supports 12 bits in the APICID register. The ACPI
  *	      tables hold all 16 bits. Software needs to be aware of this.
@@ -74,7 +97,7 @@
  * This value is also the value of the maximum number of non-router NASIDs
  * in the numalink fabric.
  *
- * NOTE: a brick may be 1 or 2 OS nodes. Don't get these confused.
+ * NOTE: a brick may contain 1 or 2 OS nodes. Don't get these confused.
  */
 #define UV_MAX_NUMALINK_BLADES	16384
 
@@ -96,8 +119,12 @@
  */
 struct uv_hub_info_s {
 	unsigned long	global_mmr_base;
-	unsigned short	local_nasid;
-	unsigned short	gnode_upper;
+	unsigned long	gpa_mask;
+	unsigned long	gnode_upper;
+	unsigned long	lowmem_remap_top;
+	unsigned long	lowmem_remap_base;
+	unsigned short	pnode;
+	unsigned short	pnode_mask;
 	unsigned short	coherency_domain_number;
 	unsigned short	numa_blade_id;
 	unsigned char	blade_processor_id;
@@ -112,83 +139,124 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
  * Local & Global MMR space macros.
  * 	Note: macros are intended to be used ONLY by inline functions
  * 	in this file - not by other kernel code.
+ * 		n -  NASID (full 15-bit global nasid)
+ * 		g -  GNODE (full 15-bit global nasid, right shifted 1)
+ * 		p -  PNODE (local part of nsids, right shifted 1)
  */
-#define UV_SNASID(n)			((n) >> 1)
-#define UV_NASID(n)			((n) << 1)
+#define UV_NASID_TO_PNODE(n)		(((n) >> 1) & uv_hub_info->pnode_mask)
+#define UV_PNODE_TO_NASID(p)		(((p) << 1) | uv_hub_info->gnode_upper)
 
 #define UV_LOCAL_MMR_BASE		0xf4000000UL
 #define UV_GLOBAL_MMR32_BASE		0xf8000000UL
 #define UV_GLOBAL_MMR64_BASE		(uv_hub_info->global_mmr_base)
 
-#define UV_GLOBAL_MMR32_SNASID_MASK	0x3ff
-#define UV_GLOBAL_MMR32_SNASID_SHIFT	15
-#define UV_GLOBAL_MMR64_SNASID_SHIFT	26
+#define UV_GLOBAL_MMR32_PNODE_SHIFT	15
+#define UV_GLOBAL_MMR64_PNODE_SHIFT	26
 
-#define UV_GLOBAL_MMR32_NASID_BITS(n)					\
-		(((UV_SNASID(n) & UV_GLOBAL_MMR32_SNASID_MASK)) <<	\
-		(UV_GLOBAL_MMR32_SNASID_SHIFT))
+#define UV_GLOBAL_MMR32_PNODE_BITS(p)	((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT))
 
-#define UV_GLOBAL_MMR64_NASID_BITS(n)					\
-	((unsigned long)UV_SNASID(n) << UV_GLOBAL_MMR64_SNASID_SHIFT)
+#define UV_GLOBAL_MMR64_PNODE_BITS(p)					\
+	((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT)
+
+#define UV_APIC_PNODE_SHIFT	6
+
+/*
+ * Macros for converting between kernel virtual addresses, socket local physical
+ * addresses, and UV global physical addresses.
+ * 	Note: use the standard __pa() & __va() macros for converting
+ * 	      between socket virtual and socket physical addresses.
+ */
+
+/* socket phys RAM --> UV global physical address */
+static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
+{
+	if (paddr < uv_hub_info->lowmem_remap_top)
+		paddr += uv_hub_info->lowmem_remap_base;
+	return paddr | uv_hub_info->gnode_upper;
+}
+
+
+/* socket virtual --> UV global physical address */
+static inline unsigned long uv_gpa(void *v)
+{
+	return __pa(v) | uv_hub_info->gnode_upper;
+}
+
+/* socket virtual --> UV global physical address */
+static inline void *uv_vgpa(void *v)
+{
+	return (void *)uv_gpa(v);
+}
+
+/* UV global physical address --> socket virtual */
+static inline void *uv_va(unsigned long gpa)
+{
+	return __va(gpa & uv_hub_info->gpa_mask);
+}
+
+/* pnode, offset --> socket virtual */
+static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
+{
+	return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset);
+}
 
-#define UV_APIC_NASID_SHIFT	6
 
 /*
- * Extract a NASID from an APICID (full apicid, not processor subset)
+ * Extract a PNODE from an APICID (full apicid, not processor subset)
  */
-static inline int uv_apicid_to_nasid(int apicid)
+static inline int uv_apicid_to_pnode(int apicid)
 {
-	return (UV_NASID(apicid >> UV_APIC_NASID_SHIFT));
+	return (apicid >> UV_APIC_PNODE_SHIFT);
 }
 
 /*
  * Access global MMRs using the low memory MMR32 space. This region supports
  * faster MMR access but not all MMRs are accessible in this space.
  */
-static inline unsigned long *uv_global_mmr32_address(int nasid,
+static inline unsigned long *uv_global_mmr32_address(int pnode,
 				unsigned long offset)
 {
 	return __va(UV_GLOBAL_MMR32_BASE |
-		       UV_GLOBAL_MMR32_NASID_BITS(nasid) | offset);
+		       UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset);
 }
 
-static inline void uv_write_global_mmr32(int nasid, unsigned long offset,
+static inline void uv_write_global_mmr32(int pnode, unsigned long offset,
 				 unsigned long val)
 {
-	*uv_global_mmr32_address(nasid, offset) = val;
+	*uv_global_mmr32_address(pnode, offset) = val;
 }
 
-static inline unsigned long uv_read_global_mmr32(int nasid,
+static inline unsigned long uv_read_global_mmr32(int pnode,
 						 unsigned long offset)
 {
-	return *uv_global_mmr32_address(nasid, offset);
+	return *uv_global_mmr32_address(pnode, offset);
 }
 
 /*
  * Access Global MMR space using the MMR space located at the top of physical
  * memory.
  */
-static inline unsigned long *uv_global_mmr64_address(int nasid,
+static inline unsigned long *uv_global_mmr64_address(int pnode,
 				unsigned long offset)
 {
 	return __va(UV_GLOBAL_MMR64_BASE |
-		       UV_GLOBAL_MMR64_NASID_BITS(nasid) | offset);
+		    UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset);
 }
 
-static inline void uv_write_global_mmr64(int nasid, unsigned long offset,
+static inline void uv_write_global_mmr64(int pnode, unsigned long offset,
 				unsigned long val)
 {
-	*uv_global_mmr64_address(nasid, offset) = val;
+	*uv_global_mmr64_address(pnode, offset) = val;
 }
 
-static inline unsigned long uv_read_global_mmr64(int nasid,
+static inline unsigned long uv_read_global_mmr64(int pnode,
 						 unsigned long offset)
 {
-	return *uv_global_mmr64_address(nasid, offset);
+	return *uv_global_mmr64_address(pnode, offset);
 }
 
 /*
- * Access node local MMRs. Faster than using global space but only local MMRs
+ * Access hub local MMRs. Faster than using global space but only local MMRs
  * are accessible.
  */
 static inline unsigned long *uv_local_mmr_address(unsigned long offset)
@@ -207,15 +275,15 @@ static inline void uv_write_local_mmr(unsigned long offset, unsigned long val)
 }
 
 /*
- * Structures and definitions for converting between cpu, node, and blade
+ * Structures and definitions for converting between cpu, node, pnode, and blade
  * numbers.
  */
 struct uv_blade_info {
-	unsigned short	nr_posible_cpus;
+	unsigned short	nr_possible_cpus;
 	unsigned short	nr_online_cpus;
-	unsigned short	nasid;
+	unsigned short	pnode;
 };
-struct uv_blade_info *uv_blade_info;
+extern struct uv_blade_info *uv_blade_info;
 extern short *uv_node_to_blade;
 extern short *uv_cpu_to_blade;
 extern short uv_possible_blades;
@@ -244,16 +312,16 @@ static inline int uv_node_to_blade_id(int nid)
 	return uv_node_to_blade[nid];
 }
 
-/* Convert a blade id to the NASID of the blade */
-static inline int uv_blade_to_nasid(int bid)
+/* Convert a blade id to the PNODE of the blade */
+static inline int uv_blade_to_pnode(int bid)
 {
-	return uv_blade_info[bid].nasid;
+	return uv_blade_info[bid].pnode;
 }
 
 /* Determine the number of possible cpus on a blade */
 static inline int uv_blade_nr_possible_cpus(int bid)
 {
-	return uv_blade_info[bid].nr_posible_cpus;
+	return uv_blade_info[bid].nr_possible_cpus;
 }
 
 /* Determine the number of online cpus on a blade */
@@ -262,16 +330,16 @@ static inline int uv_blade_nr_online_cpus(int bid)
 	return uv_blade_info[bid].nr_online_cpus;
 }
 
-/* Convert a cpu id to the NASID of the blade containing the cpu */
-static inline int uv_cpu_to_nasid(int cpu)
+/* Convert a cpu id to the PNODE of the blade containing the cpu */
+static inline int uv_cpu_to_pnode(int cpu)
 {
-	return uv_blade_info[uv_cpu_to_blade_id(cpu)].nasid;
+	return uv_blade_info[uv_cpu_to_blade_id(cpu)].pnode;
 }
 
-/* Convert a node number to the NASID of the blade */
-static inline int uv_node_to_nasid(int nid)
+/* Convert a linux node number to the PNODE of the blade */
+static inline int uv_node_to_pnode(int nid)
 {
-	return uv_blade_info[uv_node_to_blade_id(nid)].nasid;
+	return uv_blade_info[uv_node_to_blade_id(nid)].pnode;
 }
 
 /* Maximum possible number of blades */
diff --git a/include/asm-x86/uv/uv_mmrs.h b/include/asm-x86/uv/uv_mmrs.h
index 3b69fe6b637..ac984607652 100644
--- a/include/asm-x86/uv/uv_mmrs.h
+++ b/include/asm-x86/uv/uv_mmrs.h
@@ -11,11 +11,46 @@
 #ifndef __ASM_X86_UV_MMRS__
 #define __ASM_X86_UV_MMRS__
 
-/*
- *       AUTO GENERATED - Do not edit
- */
+#define UV_MMR_ENABLE		(1UL << 63)
 
- #define UV_MMR_ENABLE		(1UL << 63)
+/* ========================================================================= */
+/*                           UVH_BAU_DATA_CONFIG                             */
+/* ========================================================================= */
+#define UVH_BAU_DATA_CONFIG 0x61680UL
+#define UVH_BAU_DATA_CONFIG_32 0x0450
+
+#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0
+#define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_BAU_DATA_CONFIG_DM_SHFT 8
+#define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11
+#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12
+#define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_BAU_DATA_CONFIG_P_SHFT 13
+#define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_BAU_DATA_CONFIG_T_SHFT 15
+#define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_BAU_DATA_CONFIG_M_SHFT 16
+#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32
+#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_bau_data_config_u {
+    unsigned long	v;
+    struct uvh_bau_data_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
 
 /* ========================================================================= */
 /*                               UVH_IPI_INT                                 */
@@ -109,6 +144,7 @@ union uvh_lb_bau_intd_payload_queue_tail_u {
 /*                   UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE                    */
 /* ========================================================================= */
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
+#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x0aa0
 
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
@@ -169,6 +205,7 @@ union uvh_lb_bau_intd_software_acknowledge_u {
 /*                UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS                 */
 /* ========================================================================= */
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL
+#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0aa8
 
 /* ========================================================================= */
 /*                     UVH_LB_BAU_SB_ACTIVATION_CONTROL                      */
@@ -247,6 +284,331 @@ union uvh_lb_bau_sb_descriptor_base_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                      UVH_LB_MCAST_AOERR0_RPT_ENABLE                       */
+/* ========================================================================= */
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE 0x50b20UL
+
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_SHFT 0
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_MASK 0x0000000000000001UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_SHFT 1
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_MASK 0x0000000000000002UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_SHFT 2
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_MASK 0x0000000000000004UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_SHFT 3
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_MASK 0x0000000000000008UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_SHFT 4
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_MASK 0x0000000000000010UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_SHFT 5
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_MASK 0x0000000000000020UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_SHFT 6
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_MASK 0x0000000000000040UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_SHFT 7
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_MASK 0x0000000000000080UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_SHFT 8
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_MASK 0x0000000000000100UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_SHFT 9
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_MASK 0x0000000000000200UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_SHFT 10
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_MASK 0x0000000000000400UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_SHFT 11
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_MASK 0x0000000000000800UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_SHFT 12
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_MASK 0x0000000000001000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_SHFT 13
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_MASK 0x0000000000002000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_SHFT 14
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_MASK 0x0000000000004000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_SHFT 15
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_MASK 0x0000000000008000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_SHFT 16
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_MASK 0x0000000000010000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_SHFT 17
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_MASK 0x0000000000020000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_SHFT 18
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_MASK 0x0000000000040000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_SHFT 19
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_MASK 0x0000000000080000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_SHFT 20
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_MASK 0x0000000000100000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_SHFT 21
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_MASK 0x0000000000200000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_TIMEOUT_SHFT 22
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_TIMEOUT_MASK 0x0000000000400000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_SHFT 23
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_MASK 0x0000000000800000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_SHFT 24
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_MASK 0x0000000001000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_SHFT 25
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_MASK 0x0000000002000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_SHFT 26
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_MASK 0x0000000004000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_SHFT 27
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_MASK 0x0000000008000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_SHFT 28
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_MASK 0x0000000010000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_SHFT 29
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_MASK 0x0000000020000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_SHFT 30
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_MASK 0x0000000040000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_SHFT 31
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_MASK 0x0000000080000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_SHFT 32
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_MASK 0x0000000100000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_SHFT 33
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_MASK 0x0000000200000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_SHFT 34
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_MASK 0x0000000400000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_SHFT 35
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000000800000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_SHFT 36
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_MASK 0x0000001000000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_SHFT 37
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_MASK 0x0000002000000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_SHFT 38
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_MASK 0x0000004000000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_SHFT 39
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_MASK 0x0000008000000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_SHFT 40
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_MASK 0x0000010000000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_SHFT 41
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_MASK 0x0000020000000000UL
+
+union uvh_lb_mcast_aoerr0_rpt_enable_u {
+    unsigned long	v;
+    struct uvh_lb_mcast_aoerr0_rpt_enable_s {
+	unsigned long	mcast_obese_msg                         :  1;  /* RW */
+	unsigned long	mcast_data_sb_err                       :  1;  /* RW */
+	unsigned long	mcast_nack_buff_parity                  :  1;  /* RW */
+	unsigned long	mcast_timeout                           :  1;  /* RW */
+	unsigned long	mcast_inactive_reply                    :  1;  /* RW */
+	unsigned long	mcast_upgrade_error                     :  1;  /* RW */
+	unsigned long	mcast_reg_count_underflow               :  1;  /* RW */
+	unsigned long	mcast_rep_obese_msg                     :  1;  /* RW */
+	unsigned long	ucache_req_runt_msg                     :  1;  /* RW */
+	unsigned long	ucache_req_obese_msg                    :  1;  /* RW */
+	unsigned long	ucache_req_data_sb_err                  :  1;  /* RW */
+	unsigned long	ucache_rep_runt_msg                     :  1;  /* RW */
+	unsigned long	ucache_rep_obese_msg                    :  1;  /* RW */
+	unsigned long	ucache_rep_data_sb_err                  :  1;  /* RW */
+	unsigned long	ucache_rep_command_err                  :  1;  /* RW */
+	unsigned long	ucache_pend_timeout                     :  1;  /* RW */
+	unsigned long	macc_req_runt_msg                       :  1;  /* RW */
+	unsigned long	macc_req_obese_msg                      :  1;  /* RW */
+	unsigned long	macc_req_data_sb_err                    :  1;  /* RW */
+	unsigned long	macc_rep_runt_msg                       :  1;  /* RW */
+	unsigned long	macc_rep_obese_msg                      :  1;  /* RW */
+	unsigned long	macc_rep_data_sb_err                    :  1;  /* RW */
+	unsigned long	macc_timeout                            :  1;  /* RW */
+	unsigned long	macc_spurious_event                     :  1;  /* RW */
+	unsigned long	ioh_destination_table_parity            :  1;  /* RW */
+	unsigned long	get_had_error_reply                     :  1;  /* RW */
+	unsigned long	get_timeout                             :  1;  /* RW */
+	unsigned long	lock_manager_had_error_reply            :  1;  /* RW */
+	unsigned long	put_had_error_reply                     :  1;  /* RW */
+	unsigned long	put_timeout                             :  1;  /* RW */
+	unsigned long	sb_activation_overrun                   :  1;  /* RW */
+	unsigned long	completed_gb_activation_had_error_reply :  1;  /* RW */
+	unsigned long	completed_gb_activation_timeout         :  1;  /* RW */
+	unsigned long	descriptor_buffer_0_parity              :  1;  /* RW */
+	unsigned long	descriptor_buffer_1_parity              :  1;  /* RW */
+	unsigned long	socket_destination_table_parity         :  1;  /* RW */
+	unsigned long	bau_reply_payload_corruption            :  1;  /* RW */
+	unsigned long	io_port_destination_table_parity        :  1;  /* RW */
+	unsigned long	intd_soft_ack_timeout                   :  1;  /* RW */
+	unsigned long	int_rep_obese_msg                       :  1;  /* RW */
+	unsigned long	int_rep_command_err                     :  1;  /* RW */
+	unsigned long	int_timeout                             :  1;  /* RW */
+	unsigned long	rsvd_42_63                              : 22;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                          UVH_LOCAL_INT0_CONFIG                            */
+/* ========================================================================= */
+#define UVH_LOCAL_INT0_CONFIG 0x61000UL
+
+#define UVH_LOCAL_INT0_CONFIG_VECTOR_SHFT 0
+#define UVH_LOCAL_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_LOCAL_INT0_CONFIG_DM_SHFT 8
+#define UVH_LOCAL_INT0_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_LOCAL_INT0_CONFIG_DESTMODE_SHFT 11
+#define UVH_LOCAL_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_LOCAL_INT0_CONFIG_STATUS_SHFT 12
+#define UVH_LOCAL_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_LOCAL_INT0_CONFIG_P_SHFT 13
+#define UVH_LOCAL_INT0_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_LOCAL_INT0_CONFIG_T_SHFT 15
+#define UVH_LOCAL_INT0_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_LOCAL_INT0_CONFIG_M_SHFT 16
+#define UVH_LOCAL_INT0_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_LOCAL_INT0_CONFIG_APIC_ID_SHFT 32
+#define UVH_LOCAL_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_local_int0_config_u {
+    unsigned long	v;
+    struct uvh_local_int0_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                          UVH_LOCAL_INT0_ENABLE                            */
+/* ========================================================================= */
+#define UVH_LOCAL_INT0_ENABLE 0x65000UL
+
+#define UVH_LOCAL_INT0_ENABLE_LB_HCERR_SHFT 0
+#define UVH_LOCAL_INT0_ENABLE_LB_HCERR_MASK 0x0000000000000001UL
+#define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_SHFT 1
+#define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_MASK 0x0000000000000002UL
+#define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_SHFT 2
+#define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_MASK 0x0000000000000004UL
+#define UVH_LOCAL_INT0_ENABLE_LH_HCERR_SHFT 3
+#define UVH_LOCAL_INT0_ENABLE_LH_HCERR_MASK 0x0000000000000008UL
+#define UVH_LOCAL_INT0_ENABLE_RH_HCERR_SHFT 4
+#define UVH_LOCAL_INT0_ENABLE_RH_HCERR_MASK 0x0000000000000010UL
+#define UVH_LOCAL_INT0_ENABLE_XN_HCERR_SHFT 5
+#define UVH_LOCAL_INT0_ENABLE_XN_HCERR_MASK 0x0000000000000020UL
+#define UVH_LOCAL_INT0_ENABLE_SI_HCERR_SHFT 6
+#define UVH_LOCAL_INT0_ENABLE_SI_HCERR_MASK 0x0000000000000040UL
+#define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_SHFT 7
+#define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_MASK 0x0000000000000080UL
+#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_SHFT 8
+#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_MASK 0x0000000000000100UL
+#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_SHFT 9
+#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_MASK 0x0000000000000200UL
+#define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_SHFT 10
+#define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_MASK 0x0000000000000400UL
+#define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_SHFT 11
+#define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_MASK 0x0000000000000800UL
+#define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_SHFT 12
+#define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_MASK 0x0000000000001000UL
+#define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_SHFT 13
+#define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_MASK 0x0000000000002000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_SHFT 14
+#define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_MASK 0x0000000000004000UL
+#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_SHFT 15
+#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_MASK 0x0000000000008000UL
+#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_SHFT 16
+#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_MASK 0x0000000000010000UL
+#define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_SHFT 17
+#define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_MASK 0x0000000000020000UL
+#define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_SHFT 18
+#define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_MASK 0x0000000000040000UL
+#define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_SHFT 19
+#define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_MASK 0x0000000000080000UL
+#define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_SHFT 20
+#define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_MASK 0x0000000000100000UL
+#define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_SHFT 21
+#define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_MASK 0x0000000000200000UL
+#define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_SHFT 22
+#define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_SHFT 23
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_MASK 0x0000000000800000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_SHFT 24
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_MASK 0x0000000001000000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_SHFT 25
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_MASK 0x0000000002000000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_SHFT 26
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_MASK 0x0000000004000000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_SHFT 27
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_MASK 0x0000000008000000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_SHFT 28
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_MASK 0x0000000010000000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_SHFT 29
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_MASK 0x0000000020000000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_SHFT 30
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_MASK 0x0000000040000000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_SHFT 31
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_MASK 0x0000000080000000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_SHFT 32
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_MASK 0x0000000100000000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_SHFT 33
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_MASK 0x0000000200000000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_SHFT 34
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_MASK 0x0000000400000000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_SHFT 35
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_MASK 0x0000000800000000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_SHFT 36
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_MASK 0x0000001000000000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_SHFT 37
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_MASK 0x0000002000000000UL
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_SHFT 38
+#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_MASK 0x0000004000000000UL
+#define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_SHFT 39
+#define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_MASK 0x0000008000000000UL
+#define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_SHFT 40
+#define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_MASK 0x0000010000000000UL
+#define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_SHFT 41
+#define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_MASK 0x0000020000000000UL
+#define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_SHFT 42
+#define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_MASK 0x0000040000000000UL
+#define UVH_LOCAL_INT0_ENABLE_LTC_INT_SHFT 43
+#define UVH_LOCAL_INT0_ENABLE_LTC_INT_MASK 0x0000080000000000UL
+#define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_SHFT 44
+#define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL
+
+union uvh_local_int0_enable_u {
+    unsigned long	v;
+    struct uvh_local_int0_enable_s {
+	unsigned long	lb_hcerr            :  1;  /* RW */
+	unsigned long	gr0_hcerr           :  1;  /* RW */
+	unsigned long	gr1_hcerr           :  1;  /* RW */
+	unsigned long	lh_hcerr            :  1;  /* RW */
+	unsigned long	rh_hcerr            :  1;  /* RW */
+	unsigned long	xn_hcerr            :  1;  /* RW */
+	unsigned long	si_hcerr            :  1;  /* RW */
+	unsigned long	lb_aoerr0           :  1;  /* RW */
+	unsigned long	gr0_aoerr0          :  1;  /* RW */
+	unsigned long	gr1_aoerr0          :  1;  /* RW */
+	unsigned long	lh_aoerr0           :  1;  /* RW */
+	unsigned long	rh_aoerr0           :  1;  /* RW */
+	unsigned long	xn_aoerr0           :  1;  /* RW */
+	unsigned long	si_aoerr0           :  1;  /* RW */
+	unsigned long	lb_aoerr1           :  1;  /* RW */
+	unsigned long	gr0_aoerr1          :  1;  /* RW */
+	unsigned long	gr1_aoerr1          :  1;  /* RW */
+	unsigned long	lh_aoerr1           :  1;  /* RW */
+	unsigned long	rh_aoerr1           :  1;  /* RW */
+	unsigned long	xn_aoerr1           :  1;  /* RW */
+	unsigned long	si_aoerr1           :  1;  /* RW */
+	unsigned long	rh_vpi_int          :  1;  /* RW */
+	unsigned long	system_shutdown_int :  1;  /* RW */
+	unsigned long	lb_irq_int_0        :  1;  /* RW */
+	unsigned long	lb_irq_int_1        :  1;  /* RW */
+	unsigned long	lb_irq_int_2        :  1;  /* RW */
+	unsigned long	lb_irq_int_3        :  1;  /* RW */
+	unsigned long	lb_irq_int_4        :  1;  /* RW */
+	unsigned long	lb_irq_int_5        :  1;  /* RW */
+	unsigned long	lb_irq_int_6        :  1;  /* RW */
+	unsigned long	lb_irq_int_7        :  1;  /* RW */
+	unsigned long	lb_irq_int_8        :  1;  /* RW */
+	unsigned long	lb_irq_int_9        :  1;  /* RW */
+	unsigned long	lb_irq_int_10       :  1;  /* RW */
+	unsigned long	lb_irq_int_11       :  1;  /* RW */
+	unsigned long	lb_irq_int_12       :  1;  /* RW */
+	unsigned long	lb_irq_int_13       :  1;  /* RW */
+	unsigned long	lb_irq_int_14       :  1;  /* RW */
+	unsigned long	lb_irq_int_15       :  1;  /* RW */
+	unsigned long	l1_nmi_int          :  1;  /* RW */
+	unsigned long	stop_clock          :  1;  /* RW */
+	unsigned long	asic_to_l1          :  1;  /* RW */
+	unsigned long	l1_to_asic          :  1;  /* RW */
+	unsigned long	ltc_int             :  1;  /* RW */
+	unsigned long	la_seq_trigger      :  1;  /* RW */
+	unsigned long	rsvd_45_63          : 19;  /*    */
+    } s;
+};
+
 /* ========================================================================= */
 /*                               UVH_NODE_ID                                 */
 /* ========================================================================= */
@@ -283,6 +645,73 @@ union uvh_node_id_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                          UVH_NODE_PRESENT_TABLE                           */
+/* ========================================================================= */
+#define UVH_NODE_PRESENT_TABLE 0x1400UL
+#define UVH_NODE_PRESENT_TABLE_DEPTH 16
+
+#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0
+#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL
+
+union uvh_node_present_table_u {
+    unsigned long	v;
+    struct uvh_node_present_table_s {
+	unsigned long	nodes : 64;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR                  */
+/* ========================================================================= */
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
+
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_alias210_redirect_config_0_mmr_s {
+	unsigned long	rsvd_0_23 : 24;  /*    */
+	unsigned long	dest_base : 22;  /* RW */
+	unsigned long	rsvd_46_63: 18;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR                  */
+/* ========================================================================= */
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
+
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_alias210_redirect_config_1_mmr_s {
+	unsigned long	rsvd_0_23 : 24;  /*    */
+	unsigned long	dest_base : 22;  /* RW */
+	unsigned long	rsvd_46_63: 18;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR                  */
+/* ========================================================================= */
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
+
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_alias210_redirect_config_2_mmr_s {
+	unsigned long	rsvd_0_23 : 24;  /*    */
+	unsigned long	dest_base : 22;  /* RW */
+	unsigned long	rsvd_46_63: 18;  /*    */
+    } s;
+};
+
 /* ========================================================================= */
 /*                    UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR                      */
 /* ========================================================================= */
@@ -369,5 +798,77 @@ union uvh_si_addr_map_config_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                       UVH_SI_ALIAS0_OVERLAY_CONFIG                        */
+/* ========================================================================= */
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG 0xc80008UL
+
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_si_alias0_overlay_config_u {
+    unsigned long	v;
+    struct uvh_si_alias0_overlay_config_s {
+	unsigned long	rsvd_0_23: 24;  /*    */
+	unsigned long	base    :  8;  /* RW */
+	unsigned long	rsvd_32_47: 16;  /*    */
+	unsigned long	m_alias :  5;  /* RW */
+	unsigned long	rsvd_53_62: 10;  /*    */
+	unsigned long	enable  :  1;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                       UVH_SI_ALIAS1_OVERLAY_CONFIG                        */
+/* ========================================================================= */
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG 0xc80010UL
+
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_si_alias1_overlay_config_u {
+    unsigned long	v;
+    struct uvh_si_alias1_overlay_config_s {
+	unsigned long	rsvd_0_23: 24;  /*    */
+	unsigned long	base    :  8;  /* RW */
+	unsigned long	rsvd_32_47: 16;  /*    */
+	unsigned long	m_alias :  5;  /* RW */
+	unsigned long	rsvd_53_62: 10;  /*    */
+	unsigned long	enable  :  1;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                       UVH_SI_ALIAS2_OVERLAY_CONFIG                        */
+/* ========================================================================= */
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG 0xc80018UL
+
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_si_alias2_overlay_config_u {
+    unsigned long	v;
+    struct uvh_si_alias2_overlay_config_s {
+	unsigned long	rsvd_0_23: 24;  /*    */
+	unsigned long	base    :  8;  /* RW */
+	unsigned long	rsvd_32_47: 16;  /*    */
+	unsigned long	m_alias :  5;  /* RW */
+	unsigned long	rsvd_53_62: 10;  /*    */
+	unsigned long	enable  :  1;  /* RW */
+    } s;
+};
+
 
 #endif /* __ASM_X86_UV_MMRS__ */
-- 
cgit v1.2.3-70-g09d2


From 4c1cbafb88490757a38119c41229251369bcecbc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 3 Jun 2008 09:28:52 +0200
Subject: x86 mpparse: build fix

fix this build bug:

 drivers/acpi/pci_irq.c: In function 'acpi_pci_irq_enable':
 drivers/acpi/pci_irq.c:574: error: implicit declaration of function 'mp_config_acpi_gsi'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/acpi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h
index 14411c9de46..73ce5b32443 100644
--- a/include/asm-x86/acpi.h
+++ b/include/asm-x86/acpi.h
@@ -28,6 +28,7 @@
 #include <asm/numa.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
+#include <asm/mpspec.h>
 
 #define COMPILER_DEPENDENT_INT64   long long
 #define COMPILER_DEPENDENT_UINT64  unsigned long long
-- 
cgit v1.2.3-70-g09d2


From ba924c81dd5a7a7fb5ded025af7fdd3b61f8ca67 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 31 May 2008 22:51:51 -0700
Subject: x86, numa, 32-bit: increase max_elements to 1024

so every element will represent 64M instead of 256M.

AMD opteron could have HW memory hole remapping, so could have
[0, 8g + 64M) on node0. Reduce element size to 64M to keep that on node 0

Later we need to use find_e820_area() to allocate memory_node_map like
on 64-bit. But need to move memory_present out of populate_mem_map...

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/discontig_32.c  | 14 +++++++-------
 include/asm-x86/mmzone_32.h |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 55fdbab6b01..922af20d1d2 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -59,14 +59,14 @@ unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly;
 /*
  * 4) physnode_map     - the mapping between a pfn and owning node
  * physnode_map keeps track of the physical memory layout of a generic
- * numa node on a 256Mb break (each element of the array will
- * represent 256Mb of memory and will be marked by the node id.  so,
+ * numa node on a 64Mb break (each element of the array will
+ * represent 64Mb of memory and will be marked by the node id.  so,
  * if the first gig is on node 0, and the second gig is on node 1
  * physnode_map will contain:
  *
- *     physnode_map[0-3] = 0;
- *     physnode_map[4-7] = 1;
- *     physnode_map[8- ] = -1;
+ *     physnode_map[0-15] = 0;
+ *     physnode_map[16-31] = 1;
+ *     physnode_map[32- ] = -1;
  */
 s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1};
 EXPORT_SYMBOL(physnode_map);
@@ -81,9 +81,9 @@ void memory_present(int nid, unsigned long start, unsigned long end)
 	printk(KERN_DEBUG "  ");
 	for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) {
 		physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
-		printk("%ld ", pfn);
+		printk(KERN_CONT "%ld ", pfn);
 	}
-	printk("\n");
+	printk(KERN_CONT "\n");
 }
 
 unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h
index faef751181b..ab0012888c4 100644
--- a/include/asm-x86/mmzone_32.h
+++ b/include/asm-x86/mmzone_32.h
@@ -51,14 +51,14 @@ extern int early_pfn_to_nid(unsigned long pfn);
 /*
  * generic node memory support, the following assumptions apply:
  *
- * 1) memory comes in 256Mb contigious chunks which are either present or not
+ * 1) memory comes in 64Mb contigious chunks which are either present or not
  * 2) we will not have more than 64Gb in total
  *
  * for now assume that 64Gb is max amount of RAM for whole system
  *    64Gb / 4096bytes/page = 16777216 pages
  */
 #define MAX_NR_PAGES 16777216
-#define MAX_ELEMENTS 256
+#define MAX_ELEMENTS 1024
 #define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS)
 
 extern s8 physnode_map[];
-- 
cgit v1.2.3-70-g09d2


From 0596152388e234efebce464355186ad9e16c8cb6 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 31 May 2008 22:52:47 -0700
Subject: x86, 32-bit: change propagate_e820_map() back to find_max_pfn()

we don't need to call memory_present that early.
numa and sparse will call memory_present later and might
even fail, it will call memory_present for the full range.

also for sparse it will call alloc_bootmem ... before we set up bootmem.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820_32.c  | 5 ++---
 arch/x86/kernel/setup_32.c | 4 ++--
 arch/x86/mm/discontig_32.c | 2 +-
 include/asm-x86/e820_32.h  | 2 +-
 4 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index db760d4706a..0c025d04fc2 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -210,7 +210,7 @@ void __init init_iomem_resources(struct resource *code_resource,
 /*
  * Find the highest page frame number we have available
  */
-void __init propagate_e820_map(void)
+void __init find_max_pfn(void)
 {
 	int i;
 
@@ -227,7 +227,6 @@ void __init propagate_e820_map(void)
 			continue;
 		if (end > max_pfn)
 			max_pfn = end;
-		memory_present(0, start, end);
 	}
 }
 
@@ -361,7 +360,7 @@ static int __init parse_memmap(char *arg)
 		 * size before original memory map is
 		 * reset.
 		 */
-		propagate_e820_map();
+		find_max_pfn();
 		saved_max_pfn = max_pfn;
 #endif
 		e820.nr_map = 0;
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 6f40cb560ea..29010420458 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -730,10 +730,10 @@ void __init setup_arch(char **cmdline_p)
 		efi_init();
 
 	/* update e820 for memory not covered by WB MTRRs */
-	propagate_e820_map();
+	find_max_pfn();
 	mtrr_bp_init();
 	if (mtrr_trim_uncached_memory(max_pfn))
-		propagate_e820_map();
+		find_max_pfn();
 
 	max_low_pfn = setup_memory();
 
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 98b099eeab3..ebbbba33815 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -120,7 +120,7 @@ int __init get_memcfg_numa_flat(void)
 	printk("NUMA - single node, flat memory mode\n");
 
 	/* Run the memory configuration and find the top of memory. */
-	propagate_e820_map();
+	find_max_pfn();
 	node_start_pfn[0] = 0;
 	node_end_pfn[0] = max_pfn;
 	memory_present(0, 0, max_pfn);
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index 7ace82570a3..00fbc60b9d3 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -21,7 +21,7 @@
 extern void setup_memory_map(void);
 extern void finish_e820_parsing(void);
 
-extern void propagate_e820_map(void);
+extern void find_max_pfn(void);
 extern void register_bootmem_low_pages(unsigned long max_low_pfn);
 extern void limit_regions(unsigned long long size);
 extern void init_iomem_resources(struct resource *code_resource,
-- 
cgit v1.2.3-70-g09d2


From 2944e16b25e7fb8b5ee0dd9dc7197a0f9e523cfd Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 1 Jun 2008 13:17:38 -0700
Subject: x86: update mptable

make mptable to be consistent with acpi routing, so we could:

1. kexec kernel with acpi=off
2. work around BIOSes where acpi routing is working, but mptable is
   not right, so can use kernel/kexec to start other OSes that don't have
   good acpi support.

command line: update_mptable

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c |  22 +++
 arch/x86/kernel/e820.c      |  25 +++
 arch/x86/kernel/mpparse.c   | 401 ++++++++++++++++++++++++++++++++++++++++++--
 arch/x86/kernel/setup_64.c  |   4 +
 drivers/acpi/pci_irq.c      |   5 +
 include/asm-x86/e820.h      |   1 +
 include/asm-x86/mpspec.h    |   4 +
 7 files changed, 445 insertions(+), 17 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 2ddfabae382..f226bdc19f6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1154,6 +1154,28 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 	return gsi;
 }
 
+int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
+			u32 gsi, int triggering, int polarity)
+{
+	struct mpc_config_intsrc intsrc;
+	int ioapic;
+
+	/* print the entry should happen on mptable identically */
+	intsrc.mpc_type = MP_INTSRC;
+	intsrc.mpc_irqtype = mp_INT;
+	intsrc.mpc_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
+				(polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
+	intsrc.mpc_srcbus = number;
+	intsrc.mpc_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
+	ioapic = mp_find_ioapic(gsi);
+	intsrc.mpc_dstapic = mp_ioapic_routing[ioapic].apic_id;
+	intsrc.mpc_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+	MP_intsrc_info(&intsrc);
+
+	return 0;
+}
+
 /*
  * Parse IOAPIC related entries in MADT
  * returns 0 on success, < 0 on error
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 0cd9132c945..cd2b99e27d4 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -739,3 +739,28 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
 	return -1UL;
 
 }
+
+/*
+ * pre allocated 4k and reserved it in e820
+ */
+u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
+{
+	u64 size = 0;
+	u64 addr;
+	u64 start;
+
+	start = startt;
+	while (size < sizet)
+		start = find_e820_area_size(start, &size, align);
+
+	if (size < sizet)
+		return 0;
+
+	addr = round_down(start + size - sizet, align);
+	update_memory_range(addr, sizet, E820_RAM, E820_RESERVED);
+	printk(KERN_INFO "update e820 for early_reserve_e820\n");
+	update_e820();
+
+	return addr;
+}
+
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 9f3792d5504..8898aa49079 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -25,6 +25,8 @@
 #include <asm/proto.h>
 #include <asm/acpi.h>
 #include <asm/bios_ebda.h>
+#include <asm/e820.h>
+#include <asm/trampoline.h>
 
 #include <mach_apic.h>
 #ifdef CONFIG_X86_32
@@ -161,20 +163,81 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
 	nr_ioapics++;
 }
 
-static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
+static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
 {
-	printk(KERN_INFO "Int: type %d, pol %d, trig %d, bus %02x,"
+	printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
 		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
 		m->mpc_irqtype, m->mpc_irqflag & 3,
 		(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
 		m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
-	mp_irqs[mp_irq_entries].mp_dstapic = m->mpc_dstapic;
-	mp_irqs[mp_irq_entries].mp_type = m->mpc_type;
-	mp_irqs[mp_irq_entries].mp_irqtype = m->mpc_irqtype;
-	mp_irqs[mp_irq_entries].mp_irqflag = m->mpc_irqflag;
-	mp_irqs[mp_irq_entries].mp_srcbus = m->mpc_srcbus;
-	mp_irqs[mp_irq_entries].mp_srcbusirq = m->mpc_srcbusirq;
-	mp_irqs[mp_irq_entries].mp_dstirq = m->mpc_dstirq;
+}
+
+static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
+{
+	printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
+		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
+		mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
+		(mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
+		mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
+}
+
+static void assign_to_mp_irq(struct mpc_config_intsrc *m,
+				    struct mp_config_intsrc *mp_irq)
+{
+	mp_irq->mp_dstapic = m->mpc_dstapic;
+	mp_irq->mp_type = m->mpc_type;
+	mp_irq->mp_irqtype = m->mpc_irqtype;
+	mp_irq->mp_irqflag = m->mpc_irqflag;
+	mp_irq->mp_srcbus = m->mpc_srcbus;
+	mp_irq->mp_srcbusirq = m->mpc_srcbusirq;
+	mp_irq->mp_dstirq = m->mpc_dstirq;
+}
+
+static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
+					struct mpc_config_intsrc *m)
+{
+	m->mpc_dstapic = mp_irq->mp_dstapic;
+	m->mpc_type = mp_irq->mp_type;
+	m->mpc_irqtype = mp_irq->mp_irqtype;
+	m->mpc_irqflag = mp_irq->mp_irqflag;
+	m->mpc_srcbus = mp_irq->mp_srcbus;
+	m->mpc_srcbusirq = mp_irq->mp_srcbusirq;
+	m->mpc_dstirq = mp_irq->mp_dstirq;
+}
+
+static int mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
+					struct mpc_config_intsrc *m)
+{
+	if (mp_irq->mp_dstapic != m->mpc_dstapic)
+		return 1;
+	if (mp_irq->mp_type != m->mpc_type)
+		return 2;
+	if (mp_irq->mp_irqtype != m->mpc_irqtype)
+		return 3;
+	if (mp_irq->mp_irqflag != m->mpc_irqflag)
+		return 4;
+	if (mp_irq->mp_srcbus != m->mpc_srcbus)
+		return 5;
+	if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq)
+		return 6;
+	if (mp_irq->mp_dstirq != m->mpc_dstirq)
+		return 7;
+
+	return 0;
+}
+
+void MP_intsrc_info(struct mpc_config_intsrc *m)
+{
+	int i;
+
+	print_MP_intsrc_info(m);
+
+	for (i = 0; i < mp_irq_entries; i++) {
+		if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
+			return;
+	}
+
+	assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
 	if (++mp_irq_entries == MAX_IRQ_SOURCES)
 		panic("Max # of irq sources exceeded!!\n");
 }
@@ -268,12 +331,9 @@ static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
  * Read/parse the MPC
  */
 
-static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
+static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem,
+				char *str)
 {
-	char str[16];
-	char oem[10];
-	int count = sizeof(*mpc);
-	unsigned char *mpt = ((unsigned char *)mpc) + count;
 
 	if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
 		printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
@@ -301,13 +361,28 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
 	memcpy(str, mpc->mpc_productid, 12);
 	str[12] = 0;
 
-#ifdef CONFIG_X86_32
-	mps_oem_check(mpc, oem, str);
-#endif
 	printk(KERN_INFO "MPTABLE: Product ID: %s\n", str);
 
 	printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic);
 
+	return 1;
+}
+
+static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
+{
+	char str[16];
+	char oem[10];
+
+	int count = sizeof(*mpc);
+	unsigned char *mpt = ((unsigned char *)mpc) + count;
+
+	if (!smp_check_mpc(mpc, oem, str))
+		return 0;
+
+#ifdef CONFIG_X86_32
+	mps_oem_check(mpc, oem, str);
+#endif
+
 	/* save the local APIC address, it might be non-default */
 	if (!acpi_lapic)
 		mp_lapic_addr = mpc->mpc_lapic;
@@ -785,3 +860,295 @@ void __init find_smp_config(void)
 {
 	__find_smp_config(1);
 }
+
+#ifdef CONFIG_X86_IO_APIC
+static u8 __initdata irq_used[MAX_IRQ_SOURCES];
+
+static int  __init get_MP_intsrc_index(struct mpc_config_intsrc *m)
+{
+	int i;
+
+	if (m->mpc_irqtype != mp_INT)
+		return 0;
+
+	if (m->mpc_irqflag != 0x0f)
+		return 0;
+
+	/* not legacy */
+
+	for (i = 0; i < mp_irq_entries; i++) {
+		if (mp_irqs[i].mp_irqtype != mp_INT)
+			continue;
+
+		if (mp_irqs[i].mp_irqflag != 0x0f)
+			continue;
+
+		if (mp_irqs[i].mp_srcbus != m->mpc_srcbus)
+			continue;
+		if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq)
+			continue;
+		if (irq_used[i]) {
+			/* already claimed */
+			return -2;
+		}
+		irq_used[i] = 1;
+		return i;
+	}
+
+	/* not found */
+	return -1;
+}
+
+#define SPARE_SLOT_NUM 20
+
+static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
+#endif
+
+static int  __init replace_intsrc_all(struct mp_config_table *mpc,
+					unsigned long mpc_new_phys,
+					unsigned long mpc_new_length)
+{
+#ifdef CONFIG_X86_IO_APIC
+	int i;
+	int nr_m_spare = 0;
+#endif
+
+	int count = sizeof(*mpc);
+	unsigned char *mpt = ((unsigned char *)mpc) + count;
+
+	printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length);
+	while (count < mpc->mpc_length) {
+		switch (*mpt) {
+		case MP_PROCESSOR:
+			{
+				struct mpc_config_processor *m =
+				    (struct mpc_config_processor *)mpt;
+				mpt += sizeof(*m);
+				count += sizeof(*m);
+				break;
+			}
+		case MP_BUS:
+			{
+				struct mpc_config_bus *m =
+				    (struct mpc_config_bus *)mpt;
+				mpt += sizeof(*m);
+				count += sizeof(*m);
+				break;
+			}
+		case MP_IOAPIC:
+			{
+				mpt += sizeof(struct mpc_config_ioapic);
+				count += sizeof(struct mpc_config_ioapic);
+				break;
+			}
+		case MP_INTSRC:
+			{
+#ifdef CONFIG_X86_IO_APIC
+				struct mpc_config_intsrc *m =
+				    (struct mpc_config_intsrc *)mpt;
+
+				printk(KERN_INFO "OLD ");
+				print_MP_intsrc_info(m);
+				i = get_MP_intsrc_index(m);
+				if (i > 0) {
+					assign_to_mpc_intsrc(&mp_irqs[i], m);
+					printk(KERN_INFO "NEW ");
+					print_mp_irq_info(&mp_irqs[i]);
+				} else if (!i) {
+					/* legacy, do nothing */
+				} else if (nr_m_spare < SPARE_SLOT_NUM) {
+					/*
+					 * not found (-1), or duplicated (-2)
+					 * are invalid entries,
+					 * we need to use the slot  later
+					 */
+					m_spare[nr_m_spare] = m;
+					nr_m_spare++;
+				}
+#endif
+				mpt += sizeof(struct mpc_config_intsrc);
+				count += sizeof(struct mpc_config_intsrc);
+				break;
+			}
+		case MP_LINTSRC:
+			{
+				struct mpc_config_lintsrc *m =
+				    (struct mpc_config_lintsrc *)mpt;
+				mpt += sizeof(*m);
+				count += sizeof(*m);
+				break;
+			}
+		default:
+			/* wrong mptable */
+			printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
+			printk(KERN_ERR "type %x\n", *mpt);
+			print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_ADDRESS, 16,
+					1, mpc, mpc->mpc_length, 1);
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_X86_IO_APIC
+	for (i = 0; i < mp_irq_entries; i++) {
+		if (irq_used[i])
+			continue;
+
+		if (mp_irqs[i].mp_irqtype != mp_INT)
+			continue;
+
+		if (mp_irqs[i].mp_irqflag != 0x0f)
+			continue;
+
+		if (nr_m_spare > 0) {
+			printk(KERN_INFO "*NEW* found ");
+			nr_m_spare--;
+			assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
+			m_spare[nr_m_spare] = NULL;
+		} else {
+			struct mpc_config_intsrc *m =
+			    (struct mpc_config_intsrc *)mpt;
+			count += sizeof(struct mpc_config_intsrc);
+			if (!mpc_new_phys) {
+				printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count);
+			} else {
+				if (count <= mpc_new_length)
+					printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count);
+				else {
+					printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length);
+					goto out;
+				}
+			}
+			assign_to_mpc_intsrc(&mp_irqs[i], m);
+			mpc->mpc_length = count;
+			mpt += sizeof(struct mpc_config_intsrc);
+		}
+		print_mp_irq_info(&mp_irqs[i]);
+	}
+#endif
+out:
+	/* update checksum */
+	mpc->mpc_checksum = 0;
+	mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc,
+					   mpc->mpc_length);
+
+	return 0;
+}
+
+int __initdata enable_update_mptable;
+
+static int __init update_mptable_setup(char *str)
+{
+	enable_update_mptable = 1;
+	return 0;
+}
+early_param("update_mptable", update_mptable_setup);
+
+static unsigned long __initdata mpc_new_phys;
+static unsigned long mpc_new_length __initdata = 4096;
+
+/* alloc_mptable or alloc_mptable=4k */
+static int __initdata alloc_mptable;
+static int __init parse_alloc_mptable_opt(char *p)
+{
+	enable_update_mptable = 1;
+	alloc_mptable = 1;
+	if (!p)
+		return 0;
+	mpc_new_length = memparse(p, &p);
+	return 0;
+}
+early_param("alloc_mptable", parse_alloc_mptable_opt);
+
+void __init early_reserve_e820_mpc_new(void)
+{
+	if (enable_update_mptable && alloc_mptable) {
+		u64 startt = 0;
+#ifdef CONFIG_X86_TRAMPOLINE
+		startt = TRAMPOLINE_BASE;
+#endif
+		mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
+	}
+}
+
+static int __init update_mp_table(void)
+{
+	char str[16];
+	char oem[10];
+	struct intel_mp_floating *mpf;
+	struct mp_config_table *mpc;
+	struct mp_config_table *mpc_new;
+
+	if (!enable_update_mptable)
+		return 0;
+
+	mpf = mpf_found;
+	if (!mpf)
+		return 0;
+
+	/*
+	 * Now see if we need to go further.
+	 */
+	if (mpf->mpf_feature1 != 0)
+		return 0;
+
+	if (!mpf->mpf_physptr)
+		return 0;
+
+	mpc = phys_to_virt(mpf->mpf_physptr);
+
+	if (!smp_check_mpc(mpc, oem, str))
+		return 0;
+
+	printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf));
+	printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr);
+
+	if (mpc_new_phys && mpc->mpc_length > mpc_new_length) {
+		mpc_new_phys = 0;
+		printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n",
+			 mpc_new_length);
+	}
+
+	if (!mpc_new_phys) {
+		unsigned char old, new;
+		/* check if we can change the postion */
+		mpc->mpc_checksum = 0;
+		old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
+		mpc->mpc_checksum = 0xff;
+		new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
+		if (old == new) {
+			printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n");
+			return 0;
+		}
+		printk(KERN_INFO "use in-positon replacing\n");
+	} else {
+		mpf->mpf_physptr = mpc_new_phys;
+		mpc_new = phys_to_virt(mpc_new_phys);
+		memcpy(mpc_new, mpc, mpc->mpc_length);
+		mpc = mpc_new;
+		/* check if we can modify that */
+		if (mpc_new_phys - mpf->mpf_physptr) {
+			struct intel_mp_floating *mpf_new;
+			/* steal 16 bytes from [0, 1k) */
+			printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
+			mpf_new = phys_to_virt(0x400 - 16);
+			memcpy(mpf_new, mpf, 16);
+			mpf = mpf_new;
+			mpf->mpf_physptr = mpc_new_phys;
+		}
+		mpf->mpf_checksum = 0;
+		mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16);
+		printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr);
+	}
+
+	/*
+	 * only replace the one with mp_INT and
+	 *	 MP_IRQ_TRIGGER_LEVEL|MP_IRQ_POLARITY_LOW,
+	 * already in mp_irqs , stored by ... and mp_config_acpi_gsi,
+	 * may need pci=routeirq for all coverage
+	 */
+	replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length);
+
+	return 0;
+}
+
+late_initcall(update_mp_table);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 89e6cca5d69..978a0d637f3 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -56,6 +56,7 @@
 #include <asm/desc.h>
 #include <video/edid.h>
 #include <asm/e820.h>
+#include <asm/mpspec.h>
 #include <asm/dma.h>
 #include <asm/gart.h>
 #include <asm/mpspec.h>
@@ -381,6 +382,9 @@ void __init setup_arch(char **cmdline_p)
 	 * we are rounding upwards:
 	 */
 	end_pfn = e820_end_of_ram();
+
+	/* pre allocte 4k for mptable mpc */
+	early_reserve_e820_mpc_new();
 	/* update e820 for memory not covered by WB MTRRs */
 	mtrr_bp_init();
 	if (mtrr_trim_uncached_memory(end_pfn)) {
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 89022a74fae..e556f30c7c1 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -570,6 +570,11 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 	       (triggering == ACPI_LEVEL_SENSITIVE) ? "level" : "edge",
 	       (polarity == ACPI_ACTIVE_LOW) ? "low" : "high", dev->irq);
 
+#ifdef CONFIG_X86
+	mp_config_acpi_gsi(dev->bus->number, dev->devfn, dev->pin, irq,
+				 triggering, polarity);
+#endif
+
 	return 0;
 }
 
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 4266a2c5f2e..ee8fe4c5da4 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -84,6 +84,7 @@ extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
 extern void reserve_early(u64 start, u64 end, char *name);
 extern void free_early(u64 start, u64 end);
 extern void early_res_to_bootmem(u64 start, u64 end);
+extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index b785ddd8d76..6a34d6dfd04 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -39,6 +39,7 @@ extern unsigned long mp_lapic_addr;
 
 extern void find_smp_config(void);
 extern void get_smp_config(void);
+extern void early_reserve_e820_mpc_new(void);
 
 void __cpuinit generic_processor_info(int apicid, int version);
 #ifdef CONFIG_ACPI
@@ -47,6 +48,9 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 				   u32 gsi);
 extern void mp_config_acpi_legacy_irqs(void);
 extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
+extern void MP_intsrc_info(struct mpc_config_intsrc *m);
+extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
+				u32 gsi, int triggering, int polarity);
 #endif /* CONFIG_ACPI */
 
 #define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_APICS)
-- 
cgit v1.2.3-70-g09d2


From 835fc943f34139ed062f1ac194b52ed3b7123d88 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 3 Jun 2008 14:42:06 +0200
Subject: x86: mp build fix

fix:

 drivers/built-in.o: In function `acpi_pci_irq_enable':
 : undefined reference to `mp_config_acpi_gsi'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mpspec.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index 6a34d6dfd04..dbbf363dafb 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -49,8 +49,17 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 extern void mp_config_acpi_legacy_irqs(void);
 extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
 extern void MP_intsrc_info(struct mpc_config_intsrc *m);
+#ifdef CONFIG_X86_IO_APIC
 extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
 				u32 gsi, int triggering, int polarity);
+#else
+static inline int
+mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
+		   u32 gsi, int triggering, int polarity)
+{
+	return 0;
+}
+#endif
 #endif /* CONFIG_ACPI */
 
 #define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_APICS)
-- 
cgit v1.2.3-70-g09d2


From ab530e1f781da4d704892daab2bdd568f473687d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 3 Jun 2008 10:25:54 -0700
Subject: x86: early check if a system is numaq

so we could fall back to one node numa.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/mpparse.c  | 14 +++++++++++---
 arch/x86/kernel/numaq_32.c | 24 +++++++++++++++++++-----
 include/asm-x86/mpspec.h   |  4 ++--
 include/asm-x86/numaq.h    |  1 +
 4 files changed, 33 insertions(+), 10 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 8898aa49079..b4a950da2f9 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -70,7 +70,10 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
 		return;
 	}
 #ifdef CONFIG_X86_NUMAQ
-	apicid = mpc_apic_id(m, translation_table[mpc_record]);
+	if (found_numaq)
+		apicid = mpc_apic_id(m, translation_table[mpc_record]);
+	else
+		apicid = m->mpc_apicid;
 #else
 	apicid = m->mpc_apicid;
 #endif
@@ -91,7 +94,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
 	str[6] = 0;
 
 #ifdef CONFIG_X86_NUMAQ
-	mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+	if (found_numaq)
+		mpc_oem_bus_info(m, str, translation_table[mpc_record]);
 #else
 	printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
 #endif
@@ -112,7 +116,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
 #endif
 	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
 #ifdef CONFIG_X86_NUMAQ
-		mpc_oem_pci_bus(m, translation_table[mpc_record]);
+		if (found_numaq)
+			mpc_oem_pci_bus(m, translation_table[mpc_record]);
 #endif
 		clear_bit(m->mpc_busid, mp_bus_not_pci);
 #if defined(CONFIG_EISA) || defined (CONFIG_MCA)
@@ -321,6 +326,9 @@ static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
 {
 	if (strncmp(oem, "IBM NUMA", 8))
 		printk("Warning!  May not be a NUMA-Q system!\n");
+	else
+		found_numaq = 1;
+
 	if (mpc->mpc_oemptr)
 		smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
 				 mpc->mpc_oemsize);
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index e65281b1634..922be66668b 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -31,9 +31,12 @@
 #include <asm/numaq.h>
 #include <asm/topology.h>
 #include <asm/processor.h>
+#include <asm/mpspec.h>
 
 #define	MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
 
+int found_numaq;
+
 /*
  * Function: smp_dump_qct()
  *
@@ -67,13 +70,24 @@ static void __init smp_dump_qct(void)
 	}
 }
 
-/*
- * Unlike Summit, we don't really care to let the NUMA-Q
- * fall back to flat mode.  Don't compile for NUMA-Q
- * unless you really need it!
- */
+static __init void early_check_numaq(void)
+{
+	/*
+	 * Find possible boot-time SMP configuration:
+	 */
+	early_find_smp_config();
+	/*
+	 * get boot-time SMP configuration:
+	 */
+	if (smp_found_config)
+		early_get_smp_config();
+}
+
 int __init get_memcfg_numaq(void)
 {
+	early_check_numaq();
+	if (!found_numaq)
+		return 0;
 	smp_dump_qct();
 	return 1;
 }
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index dbbf363dafb..f38b68e8de5 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -21,11 +21,11 @@ extern int pic_mode;
 /* Each PCI slot may be a combo card with its own bus.  4 IRQ pins per slot. */
 #define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
 
+#endif
+
 extern void early_find_smp_config(void);
 extern void early_get_smp_config(void);
 
-#endif
-
 #if defined(CONFIG_MCA) || defined(CONFIG_EISA)
 extern int mp_bus_id_to_type[MAX_MP_BUSSES];
 #endif
diff --git a/include/asm-x86/numaq.h b/include/asm-x86/numaq.h
index 94b86c31239..739d16484b8 100644
--- a/include/asm-x86/numaq.h
+++ b/include/asm-x86/numaq.h
@@ -28,6 +28,7 @@
 
 #ifdef CONFIG_X86_NUMAQ
 
+extern int found_numaq;
 extern int get_memcfg_numaq(void);
 
 /*
-- 
cgit v1.2.3-70-g09d2


From ee0c80fadfa56bf4f9d90c1c023429a6bd8edd69 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 3 Jun 2008 19:34:00 -0700
Subject: x86: move e820_register_active() to e820.c

to prepare 32-bit to use it.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c    | 109 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/e820_64.c |  97 -----------------------------------------
 include/asm-x86/e820.h    |  11 +++++
 include/asm-x86/e820_64.h |   5 ---
 4 files changed, 120 insertions(+), 102 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index cd2b99e27d4..c140f731743 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -764,3 +764,112 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
 	return addr;
 }
 
+#ifdef CONFIG_X86_32
+# ifdef CONFIG_X86_PAE
+#  define MAX_ARCH_PFN		(1ULL<<(36-PAGE_SHIFT))
+# else
+#  define MAX_ARCH_PFN		(1ULL<<(32-PAGE_SHIFT))
+# endif
+#else /* CONFIG_X86_32 */
+# define MAX_ARCH_PFN MAXMEM<<PAGE_SHIFT
+#endif
+
+/*
+ * Last pfn which the user wants to use.
+ */
+unsigned long __initdata end_user_pfn = MAX_ARCH_PFN;
+
+/*
+ * Find the highest page frame number we have available
+ */
+unsigned long __init e820_end_of_ram(void)
+{
+	unsigned long last_pfn;
+	unsigned long max_arch_pfn = MAX_ARCH_PFN;
+
+	last_pfn = find_max_pfn_with_active_regions();
+
+	if (last_pfn > max_arch_pfn)
+		last_pfn = max_arch_pfn;
+	if (last_pfn > end_user_pfn)
+		last_pfn = end_user_pfn;
+
+	printk(KERN_INFO "last_pfn = %lu max_arch_pfn = %lu\n",
+			 last_pfn, max_arch_pfn);
+	return last_pfn;
+}
+
+/*
+ * Finds an active region in the address range from start_pfn to last_pfn and
+ * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
+ */
+int __init e820_find_active_region(const struct e820entry *ei,
+				  unsigned long start_pfn,
+				  unsigned long last_pfn,
+				  unsigned long *ei_startpfn,
+				  unsigned long *ei_endpfn)
+{
+	u64 align = PAGE_SIZE;
+
+	*ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
+	*ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
+
+	/* Skip map entries smaller than a page */
+	if (*ei_startpfn >= *ei_endpfn)
+		return 0;
+
+	/* Skip if map is outside the node */
+	if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
+				    *ei_startpfn >= last_pfn)
+		return 0;
+
+	/* Check for overlaps */
+	if (*ei_startpfn < start_pfn)
+		*ei_startpfn = start_pfn;
+	if (*ei_endpfn > last_pfn)
+		*ei_endpfn = last_pfn;
+
+	/* Obey end_user_pfn to save on memmap */
+	if (*ei_startpfn >= end_user_pfn)
+		return 0;
+	if (*ei_endpfn > end_user_pfn)
+		*ei_endpfn = end_user_pfn;
+
+	return 1;
+}
+
+/* Walk the e820 map and register active regions within a node */
+void __init e820_register_active_regions(int nid, unsigned long start_pfn,
+					 unsigned long last_pfn)
+{
+	unsigned long ei_startpfn;
+	unsigned long ei_endpfn;
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++)
+		if (e820_find_active_region(&e820.map[i],
+					    start_pfn, last_pfn,
+					    &ei_startpfn, &ei_endpfn))
+			add_active_range(nid, ei_startpfn, ei_endpfn);
+}
+
+/*
+ * Find the hole size (in bytes) in the memory range.
+ * @start: starting address of the memory range to scan
+ * @end: ending address of the memory range to scan
+ */
+u64 __init e820_hole_size(u64 start, u64 end)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long last_pfn = end >> PAGE_SHIFT;
+	unsigned long ei_startpfn, ei_endpfn, ram = 0;
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		if (e820_find_active_region(&e820.map[i],
+					    start_pfn, last_pfn,
+					    &ei_startpfn, &ei_endpfn))
+			ram += ei_endpfn - ei_startpfn;
+	}
+	return end - start - ((u64)ram << PAGE_SHIFT);
+}
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index a11bec20f65..0afee2ca0bf 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -41,29 +41,6 @@ unsigned long end_pfn;
  */
 unsigned long max_pfn_mapped;
 
-/*
- * Last pfn which the user wants to use.
- */
-static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
-
-/*
- * Find the highest page frame number we have available
- */
-unsigned long __init e820_end_of_ram(void)
-{
-	unsigned long last_pfn;
-
-	last_pfn = find_max_pfn_with_active_regions();
-
-	if (last_pfn > MAXMEM>>PAGE_SHIFT)
-		last_pfn = MAXMEM>>PAGE_SHIFT;
-	if (last_pfn > end_user_pfn)
-		last_pfn = end_user_pfn;
-
-	printk(KERN_INFO "last_pfn = %lu\n", last_pfn);
-	return last_pfn;
-}
-
 /*
  * Mark e820 reserved areas as busy for the resource manager.
  */
@@ -88,80 +65,6 @@ void __init e820_reserve_resources(void)
 	}
 }
 
-/*
- * Finds an active region in the address range from start_pfn to last_pfn and
- * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
- */
-static int __init e820_find_active_region(const struct e820entry *ei,
-					  unsigned long start_pfn,
-					  unsigned long last_pfn,
-					  unsigned long *ei_startpfn,
-					  unsigned long *ei_endpfn)
-{
-	*ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
-	*ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;
-
-	/* Skip map entries smaller than a page */
-	if (*ei_startpfn >= *ei_endpfn)
-		return 0;
-
-	/* Skip if map is outside the node */
-	if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
-				    *ei_startpfn >= last_pfn)
-		return 0;
-
-	/* Check for overlaps */
-	if (*ei_startpfn < start_pfn)
-		*ei_startpfn = start_pfn;
-	if (*ei_endpfn > last_pfn)
-		*ei_endpfn = last_pfn;
-
-	/* Obey end_user_pfn to save on memmap */
-	if (*ei_startpfn >= end_user_pfn)
-		return 0;
-	if (*ei_endpfn > end_user_pfn)
-		*ei_endpfn = end_user_pfn;
-
-	return 1;
-}
-
-/* Walk the e820 map and register active regions within a node */
-void __init
-e820_register_active_regions(int nid, unsigned long start_pfn,
-							unsigned long last_pfn)
-{
-	unsigned long ei_startpfn;
-	unsigned long ei_endpfn;
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++)
-		if (e820_find_active_region(&e820.map[i],
-					    start_pfn, last_pfn,
-					    &ei_startpfn, &ei_endpfn))
-			add_active_range(nid, ei_startpfn, ei_endpfn);
-}
-
-/*
- * Find the hole size (in bytes) in the memory range.
- * @start: starting address of the memory range to scan
- * @end: ending address of the memory range to scan
- */
-unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
-{
-	unsigned long start_pfn = start >> PAGE_SHIFT;
-	unsigned long last_pfn = end >> PAGE_SHIFT;
-	unsigned long ei_startpfn, ei_endpfn, ram = 0;
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		if (e820_find_active_region(&e820.map[i],
-					    start_pfn, last_pfn,
-					    &ei_startpfn, &ei_endpfn))
-			ram += ei_endpfn - ei_startpfn;
-	}
-	return end - start - (ram << PAGE_SHIFT);
-}
-
 static void early_panic(char *msg)
 {
 	early_printk(msg);
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index ee8fe4c5da4..44ed9c0a4df 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -79,6 +79,8 @@ static inline void e820_mark_nosave_regions(unsigned long limit_pfn)
 }
 #endif
 
+extern unsigned long end_user_pfn;
+
 extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
 extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
 extern void reserve_early(u64 start, u64 end, char *name);
@@ -86,6 +88,15 @@ extern void free_early(u64 start, u64 end);
 extern void early_res_to_bootmem(u64 start, u64 end);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 
+extern unsigned long e820_end_of_ram(void);
+extern int e820_find_active_region(const struct e820entry *ei,
+				  unsigned long start_pfn,
+				  unsigned long last_pfn,
+				  unsigned long *ei_startpfn,
+				  unsigned long *ei_endpfn);
+extern void e820_register_active_regions(int nid, unsigned long start_pfn,
+					 unsigned long end_pfn);
+extern u64 e820_hole_size(u64 start, u64 end);
 #endif /* __ASSEMBLY__ */
 
 #define ISA_START_ADDRESS	0xa0000
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index 917963ccab6..368585daaa4 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -16,16 +16,11 @@
 #ifndef __ASSEMBLY__
 extern void setup_memory_region(void);
 extern void contig_e820_setup(void);
-extern unsigned long e820_end_of_ram(void);
 extern void e820_reserve_resources(void);
 extern int e820_any_non_reserved(unsigned long start, unsigned long end);
 extern int is_memory_any_valid(unsigned long start, unsigned long end);
 extern int e820_all_non_reserved(unsigned long start, unsigned long end);
 extern int is_memory_all_valid(unsigned long start, unsigned long end);
-extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
-
-extern void e820_register_active_regions(int nid, unsigned long start_pfn,
-					 unsigned long end_pfn);
 
 extern void finish_e820_parsing(void);
 
-- 
cgit v1.2.3-70-g09d2


From 7b2a0a6c4866cac146dcb0433e6984eb19a81335 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 3 Jun 2008 19:35:04 -0700
Subject: x86: make 32-bit use e820_register_active_regions()

this way 32-bit is more similar to 64-bit, and smarter e820 and numa.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820_32.c  | 68 ++--------------------------------------------
 arch/x86/kernel/numaq_32.c |  3 ++
 arch/x86/kernel/setup_32.c | 24 ++++++++++++----
 arch/x86/kernel/srat_32.c  |  4 ++-
 arch/x86/mm/discontig_32.c | 19 ++++---------
 include/asm-x86/e820_32.h  |  2 --
 6 files changed, 33 insertions(+), 87 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 0c025d04fc2..e8a3b968c9f 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -207,69 +207,6 @@ void __init init_iomem_resources(struct resource *code_resource,
 	}
 }
 
-/*
- * Find the highest page frame number we have available
- */
-void __init find_max_pfn(void)
-{
-	int i;
-
-	max_pfn = 0;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		unsigned long start, end;
-		/* RAM? */
-		if (e820.map[i].type != E820_RAM)
-			continue;
-		start = PFN_UP(e820.map[i].addr);
-		end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
-		if (start >= end)
-			continue;
-		if (end > max_pfn)
-			max_pfn = end;
-	}
-}
-
-/*
- * Register fully available low RAM pages with the bootmem allocator.
- */
-void __init register_bootmem_low_pages(unsigned long max_low_pfn)
-{
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		unsigned long curr_pfn, last_pfn, size;
-		/*
-		 * Reserve usable low memory
-		 */
-		if (e820.map[i].type != E820_RAM)
-			continue;
-		/*
-		 * We are rounding up the start address of usable memory:
-		 */
-		curr_pfn = PFN_UP(e820.map[i].addr);
-		if (curr_pfn >= max_low_pfn)
-			continue;
-		/*
-		 * ... and at the end of the usable range downwards:
-		 */
-		last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
-
-		if (last_pfn > max_low_pfn)
-			last_pfn = max_low_pfn;
-
-		/*
-		 * .. finally, did all the rounding and playing
-		 * around just make the area go away?
-		 */
-		if (last_pfn <= curr_pfn)
-			continue;
-
-		size = last_pfn - curr_pfn;
-		free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
-	}
-}
-
 void __init limit_regions(unsigned long long size)
 {
 	unsigned long long current_addr;
@@ -360,8 +297,9 @@ static int __init parse_memmap(char *arg)
 		 * size before original memory map is
 		 * reset.
 		 */
-		find_max_pfn();
-		saved_max_pfn = max_pfn;
+		e820_register_active_regions(0, 0, -1UL);
+		saved_max_pfn = e820_end_of_ram();
+		remove_all_active_ranges();
 #endif
 		e820.nr_map = 0;
 		user_defined_memmap = 1;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 922be66668b..27b908254fb 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -32,6 +32,7 @@
 #include <asm/topology.h>
 #include <asm/processor.h>
 #include <asm/mpspec.h>
+#include <asm/e820.h>
 
 #define	MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
 
@@ -61,6 +62,8 @@ static void __init smp_dump_qct(void)
 			node_end_pfn[node] = MB_TO_PAGES(
 				eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
 
+			e820_register_active_regions(node, node_start_pfn[node],
+							node_end_pfn[node]);
 			memory_present(node,
 				node_start_pfn[node], node_end_pfn[node]);
 			node_remap_size[node] = node_memmap_size_bytes(node,
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index ccf3595202f..0ec6480aaa2 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -405,11 +405,12 @@ static void __init zone_sizes_init(void)
 	max_zone_pfns[ZONE_DMA] =
 		virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+	remove_all_active_ranges();
 #ifdef CONFIG_HIGHMEM
 	max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
-	add_active_range(0, 0, highend_pfn);
+	e820_register_active_regions(0, 0, highend_pfn);
 #else
-	add_active_range(0, 0, max_low_pfn);
+	e820_register_active_regions(0, 0, max_low_pfn);
 #endif
 
 	free_area_init_nodes(max_zone_pfns);
@@ -582,6 +583,7 @@ static void __init relocate_initrd(void)
 
 void __init setup_bootmem_allocator(void)
 {
+	int i;
 	unsigned long bootmap_size, bootmap;
 	/*
 	 * Initialize the boot-time allocator (with low memory only):
@@ -603,7 +605,8 @@ void __init setup_bootmem_allocator(void)
 		 min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
 	printk(KERN_INFO "  bootmap %08lx - %08lx\n",
 		 bootmap, bootmap + bootmap_size);
-	register_bootmem_low_pages(max_low_pfn);
+	for_each_online_node(i)
+		free_bootmem_with_active_regions(i, max_low_pfn);
 	early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
 
 #ifdef CONFIG_ACPI_SLEEP
@@ -733,11 +736,20 @@ void __init setup_arch(char **cmdline_p)
 	if (efi_enabled)
 		efi_init();
 
+	e820_register_active_regions(0, 0, -1UL);
+	/*
+	 * partially used pages are not usable - thus
+	 * we are rounding upwards:
+	 */
+	max_pfn = e820_end_of_ram();
+
 	/* update e820 for memory not covered by WB MTRRs */
-	find_max_pfn();
 	mtrr_bp_init();
-	if (mtrr_trim_uncached_memory(max_pfn))
-		find_max_pfn();
+	if (mtrr_trim_uncached_memory(max_pfn)) {
+		remove_all_active_ranges();
+		e820_register_active_regions(0, 0, -1UL);
+		max_pfn = e820_end_of_ram();
+	}
 
 	max_low_pfn = setup_memory();
 
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c
index 88971ee166d..32d8b114293 100644
--- a/arch/x86/kernel/srat_32.c
+++ b/arch/x86/kernel/srat_32.c
@@ -31,6 +31,7 @@
 #include <asm/srat.h>
 #include <asm/topology.h>
 #include <asm/smp.h>
+#include <asm/e820.h>
 
 /*
  * proximity macros and definitions
@@ -244,7 +245,8 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
 		printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
 		       j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
 		node_read_chunk(chunk->nid, chunk);
-		add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn);
+		e820_register_active_regions(chunk->nid, chunk->start_pfn,
+					     min(chunk->end_pfn, max_pfn));
 	}
  
 	for_each_online_node(nid) {
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 7ced26ab9ae..a89ccf3d4c1 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -120,10 +120,9 @@ int __init get_memcfg_numa_flat(void)
 {
 	printk("NUMA - single node, flat memory mode\n");
 
-	/* Run the memory configuration and find the top of memory. */
-	find_max_pfn();
 	node_start_pfn[0] = 0;
 	node_end_pfn[0] = max_pfn;
+	e820_register_active_regions(0, 0, max_pfn);
 	memory_present(0, 0, max_pfn);
 	node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn);
 
@@ -337,6 +336,11 @@ unsigned long __init setup_memory(void)
 	 * this space and use it to adjust the boundary between ZONE_NORMAL
 	 * and ZONE_HIGHMEM.
 	 */
+
+	/* call find_max_low_pfn at first, it could update max_pfn */
+	system_max_low_pfn = max_low_pfn = find_max_low_pfn();
+
+	remove_all_active_ranges();
 	get_memcfg_numa();
 
 	kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
@@ -344,7 +348,6 @@ unsigned long __init setup_memory(void)
 	/* partially used pages are not usable - thus round upwards */
 	system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
 
-	system_max_low_pfn = max_low_pfn = find_max_low_pfn();
 	kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
 	do {
 		kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT,
@@ -402,7 +405,6 @@ unsigned long __init setup_memory(void)
 
 void __init zone_sizes_init(void)
 {
-	int nid;
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 	max_zone_pfns[ZONE_DMA] =
@@ -412,15 +414,6 @@ void __init zone_sizes_init(void)
 	max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
 #endif
 
-	/* If SRAT has not registered memory, register it now */
-	if (find_max_pfn_with_active_regions() == 0) {
-		for_each_online_node(nid) {
-			if (node_has_online_mem(nid))
-				add_active_range(nid, node_start_pfn[nid],
-							node_end_pfn[nid]);
-		}
-	}
-
 	free_area_init_nodes(max_zone_pfns);
 	return;
 }
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index 00fbc60b9d3..212b74c10ef 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -21,8 +21,6 @@
 extern void setup_memory_map(void);
 extern void finish_e820_parsing(void);
 
-extern void find_max_pfn(void);
-extern void register_bootmem_low_pages(unsigned long max_low_pfn);
 extern void limit_regions(unsigned long long size);
 extern void init_iomem_resources(struct resource *code_resource,
 				 struct resource *data_resource,
-- 
cgit v1.2.3-70-g09d2


From d0ec2c6f2c2f0478b34ae78b3e65f60a561ac807 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Mon, 2 Jun 2008 14:26:18 +0800
Subject: x86: reserve highmem pages via reserve_early

This patch makes early reserved highmem pages become reserved
pages. This can be used for highmem pages allocated by bootloader such
as EFI memory map, linked list of setup_data, etc.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: andi@firstfloor.org
Cc: mingo@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/e820.c | 11 +++++++++++
 arch/x86/mm/init_32.c  |  3 ++-
 include/asm-x86/e820.h |  1 +
 3 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index ac5e9ebf70e..a706e9057ba 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -612,6 +612,17 @@ void __init free_early(u64 start, u64 end)
 	early_res[j - 1].end = 0;
 }
 
+int __init page_is_reserved_early(unsigned long pagenr)
+{
+	u64 start = (u64)pagenr << PAGE_SHIFT;
+	int i;
+	struct early_res *r;
+
+	i = find_overlapped_early(start, start + PAGE_SIZE);
+	r = &early_res[i];
+	return (i < MAX_EARLY_RES && r->end);
+}
+
 void __init early_res_to_bootmem(u64 start, u64 end)
 {
 	int i;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ec30d10154b..0e7bb5e8167 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -289,7 +289,8 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
 
 void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
 {
-	if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
+	if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn)) &&
+	    !page_is_reserved_early(pfn)) {
 		ClearPageReserved(page);
 		init_page_count(page);
 		__free_page(page);
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 44ed9c0a4df..8aa32323a18 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -86,6 +86,7 @@ extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
 extern void reserve_early(u64 start, u64 end, char *name);
 extern void free_early(u64 start, u64 end);
 extern void early_res_to_bootmem(u64 start, u64 end);
+extern int page_is_reserved_early(unsigned long pagenr);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 
 extern unsigned long e820_end_of_ram(void);
-- 
cgit v1.2.3-70-g09d2


From ecacf09f7d26b2317e8b1d59fa40f62081fad0bb Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Mon, 2 Jun 2008 14:26:21 +0800
Subject: x86: reserve EFI memory map with reserve_early

This patch reserves the EFI memory map with reserve_early(). Because EFI
memory map is allocated by bootloader, if it is not reserved by
reserved_early(), it may be overwritten through address returned by
find_e820_area().

Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: andi@firstfloor.org
Cc: mingo@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/efi.c      | 33 ++++++++++++++++++++-------------
 arch/x86/kernel/efi_64.c   |  6 ------
 arch/x86/kernel/setup_32.c |  5 ++++-
 arch/x86/kernel/setup_64.c |  7 +++----
 include/asm-x86/efi.h      |  2 +-
 5 files changed, 28 insertions(+), 25 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 4a1a26d5931..d5c7fcdd186 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -238,6 +238,23 @@ static void __init add_efi_memmap(void)
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 }
 
+void __init efi_reserve_early(void)
+{
+	unsigned long pmap;
+
+	pmap = boot_params.efi_info.efi_memmap;
+#ifdef CONFIG_X86_64
+	pmap += (__u64)boot_params.efi_info.efi_memmap_hi << 32;
+#endif
+	memmap.phys_map = (void *)pmap;
+	memmap.nr_map = boot_params.efi_info.efi_memmap_size /
+		boot_params.efi_info.efi_memdesc_size;
+	memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
+	memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
+	reserve_early(pmap, pmap + memmap.nr_map * memmap.desc_size,
+		      "EFI memmap");
+}
+
 #if EFI_DEBUG
 static void __init print_efi_memmap(void)
 {
@@ -267,21 +284,11 @@ void __init efi_init(void)
 	int i = 0;
 	void *tmp;
 
-#ifdef CONFIG_X86_32
 	efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
-	memmap.phys_map = (void *)boot_params.efi_info.efi_memmap;
-#else
-	efi_phys.systab = (efi_system_table_t *)
-		(boot_params.efi_info.efi_systab |
-		 ((__u64)boot_params.efi_info.efi_systab_hi<<32));
-	memmap.phys_map = (void *)
-		(boot_params.efi_info.efi_memmap |
-		 ((__u64)boot_params.efi_info.efi_memmap_hi<<32));
+#ifdef CONFIG_X86_64
+	efi_phys.systab = (void *)efi_phys.systab +
+		((__u64)boot_params.efi_info.efi_systab_hi<<32);
 #endif
-	memmap.nr_map = boot_params.efi_info.efi_memmap_size /
-		boot_params.efi_info.efi_memdesc_size;
-	memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
-	memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
 
 	efi.systab = early_ioremap((unsigned long)efi_phys.systab,
 				   sizeof(efi_system_table_t));
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 21a7b759687..652c5287215 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -97,12 +97,6 @@ void __init efi_call_phys_epilog(void)
 	early_runtime_code_mapping_set_exec(0);
 }
 
-void __init efi_reserve_bootmem(void)
-{
-	reserve_bootmem_generic((unsigned long)memmap.phys_map,
-				memmap.nr_map * memmap.desc_size);
-}
-
 void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size)
 {
 	static unsigned pages_mapped __initdata;
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 0ec6480aaa2..2960cbecfa5 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -67,6 +67,7 @@
 #include <asm/bios_ebda.h>
 #include <asm/cacheflush.h>
 #include <asm/processor.h>
+#include <asm/efi.h>
 
 /* This value is set up by the early boot code to point to the value
    immediately after the boot time page tables.  It contains a *physical*
@@ -683,8 +684,10 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_EFI
 	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
-		     "EL32", 4))
+		     "EL32", 4)) {
 		efi_enabled = 1;
+		efi_reserve_early();
+	}
 #endif
 
 	ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 2599b274420..078c02f6f5f 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -330,8 +330,10 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #ifdef CONFIG_EFI
 	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
-		     "EL64", 4))
+		     "EL64", 4)) {
 		efi_enabled = 1;
+		efi_reserve_early();
+	}
 #endif
 
 	ARCH_SETUP
@@ -457,9 +459,6 @@ void __init setup_arch(char **cmdline_p)
        acpi_reserve_bootmem();
 #endif
 
-	if (efi_enabled)
-		efi_reserve_bootmem();
-
 #ifdef CONFIG_X86_MPPARSE
        /*
 	* Find and reserve possible boot-time SMP configuration:
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index d53004b855c..7ed2bd7a7f5 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -90,7 +90,7 @@ extern void *efi_ioremap(unsigned long addr, unsigned long size);
 
 #endif /* CONFIG_X86_32 */
 
-extern void efi_reserve_bootmem(void);
+extern void efi_reserve_early(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
 
-- 
cgit v1.2.3-70-g09d2


From 0c51a965ed3c44dd50497e74492a015680e49899 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Mon, 2 Jun 2008 14:26:23 +0800
Subject: x86: extract common part of head32.c and head64.c into head.c

This patch extracts the common part of head32.c and head64.c into head.c.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: andi@firstfloor.org
Cc: mingo@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Makefile           |  1 +
 arch/x86/kernel/Makefile    |  2 +-
 arch/x86/kernel/head.c      | 55 +++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/head32.c    | 50 -----------------------------------------
 arch/x86/kernel/head64.c    | 50 -----------------------------------------
 include/asm-x86/bios_ebda.h |  2 ++
 6 files changed, 59 insertions(+), 101 deletions(-)
 create mode 100644 arch/x86/kernel/head.c

(limited to 'include/asm-x86')

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 3cff3c894cf..30b046bcccb 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -160,6 +160,7 @@ KBUILD_AFLAGS += $(mflags-y)
 
 head-y := arch/x86/kernel/head_$(BITS).o
 head-y += arch/x86/kernel/head$(BITS).o
+head-y += arch/x86/kernel/head.o
 head-y += arch/x86/kernel/init_task.o
 
 libs-y  += arch/x86/lib/
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5369a4e36f1..7e01ce39c83 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-extra-y                := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds
+extra-y                := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds
 
 CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
 
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
new file mode 100644
index 00000000000..e0d0ce58979
--- /dev/null
+++ b/arch/x86/kernel/head.c
@@ -0,0 +1,55 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <asm/setup.h>
+#include <asm/bios_ebda.h>
+
+#define BIOS_LOWMEM_KILOBYTES 0x413
+
+/*
+ * The BIOS places the EBDA/XBDA at the top of conventional
+ * memory, and usually decreases the reported amount of
+ * conventional memory (int 0x12) too. This also contains a
+ * workaround for Dell systems that neglect to reserve EBDA.
+ * The same workaround also avoids a problem with the AMD768MPX
+ * chipset: reserve a page before VGA to prevent PCI prefetch
+ * into it (errata #56). Usually the page is reserved anyways,
+ * unless you have no PS/2 mouse plugged in.
+ */
+void __init reserve_ebda_region(void)
+{
+	unsigned int lowmem, ebda_addr;
+
+	/* To determine the position of the EBDA and the */
+	/* end of conventional memory, we need to look at */
+	/* the BIOS data area. In a paravirtual environment */
+	/* that area is absent. We'll just have to assume */
+	/* that the paravirt case can handle memory setup */
+	/* correctly, without our help. */
+	if (paravirt_enabled())
+		return;
+
+	/* end of low (conventional) memory */
+	lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
+	lowmem <<= 10;
+
+	/* start of EBDA area */
+	ebda_addr = get_bios_ebda();
+
+	/* Fixup: bios puts an EBDA in the top 64K segment */
+	/* of conventional memory, but does not adjust lowmem. */
+	if ((lowmem - ebda_addr) <= 0x10000)
+		lowmem = ebda_addr;
+
+	/* Fixup: bios does not report an EBDA at all. */
+	/* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
+	if ((ebda_addr == 0) && (lowmem >= 0x9f000))
+		lowmem = 0x9f000;
+
+	/* Paranoia: should never happen, but... */
+	if ((lowmem == 0) || (lowmem >= 0x100000))
+		lowmem = 0x9f000;
+
+	/* reserve all memory between lowmem and the 1MB mark */
+	reserve_early(lowmem, 0x100000, "BIOS reserved");
+}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 0b6cb9fba71..fa1d25dd83e 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -13,56 +13,6 @@
 #include <asm/e820.h>
 #include <asm/bios_ebda.h>
 
-#define BIOS_LOWMEM_KILOBYTES 0x413
-
-/*
- * The BIOS places the EBDA/XBDA at the top of conventional
- * memory, and usually decreases the reported amount of
- * conventional memory (int 0x12) too. This also contains a
- * workaround for Dell systems that neglect to reserve EBDA.
- * The same workaround also avoids a problem with the AMD768MPX
- * chipset: reserve a page before VGA to prevent PCI prefetch
- * into it (errata #56). Usually the page is reserved anyways,
- * unless you have no PS/2 mouse plugged in.
- */
-static void __init reserve_ebda_region(void)
-{
-	unsigned int lowmem, ebda_addr;
-
-	/* To determine the position of the EBDA and the */
-	/* end of conventional memory, we need to look at */
-	/* the BIOS data area. In a paravirtual environment */
-	/* that area is absent. We'll just have to assume */
-	/* that the paravirt case can handle memory setup */
-	/* correctly, without our help. */
-	if (paravirt_enabled())
-		return;
-
-	/* end of low (conventional) memory */
-	lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
-	lowmem <<= 10;
-
-	/* start of EBDA area */
-	ebda_addr = get_bios_ebda();
-
-	/* Fixup: bios puts an EBDA in the top 64K segment */
-	/* of conventional memory, but does not adjust lowmem. */
-	if ((lowmem - ebda_addr) <= 0x10000)
-		lowmem = ebda_addr;
-
-	/* Fixup: bios does not report an EBDA at all. */
-	/* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
-	if ((ebda_addr == 0) && (lowmem >= 0x9f000))
-		lowmem = 0x9f000;
-
-	/* Paranoia: should never happen, but... */
-	if ((lowmem == 0) || (lowmem >= 0x100000))
-		lowmem = 0x9f000;
-
-	/* reserve all memory between lowmem and the 1MB mark */
-	reserve_early(lowmem, 0x100000, "BIOS reserved");
-}
-
 void __init i386_start_kernel(void)
 {
 	reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index e25c57b8aa8..f1773c8ee21 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -51,56 +51,6 @@ static void __init copy_bootdata(char *real_mode_data)
 	}
 }
 
-#define BIOS_LOWMEM_KILOBYTES 0x413
-
-/*
- * The BIOS places the EBDA/XBDA at the top of conventional
- * memory, and usually decreases the reported amount of
- * conventional memory (int 0x12) too. This also contains a
- * workaround for Dell systems that neglect to reserve EBDA.
- * The same workaround also avoids a problem with the AMD768MPX
- * chipset: reserve a page before VGA to prevent PCI prefetch
- * into it (errata #56). Usually the page is reserved anyways,
- * unless you have no PS/2 mouse plugged in.
- */
-static void __init reserve_ebda_region(void)
-{
-	unsigned int lowmem, ebda_addr;
-
-	/* To determine the position of the EBDA and the */
-	/* end of conventional memory, we need to look at */
-	/* the BIOS data area. In a paravirtual environment */
-	/* that area is absent. We'll just have to assume */
-	/* that the paravirt case can handle memory setup */
-	/* correctly, without our help. */
-	if (paravirt_enabled())
-		return;
-
-	/* end of low (conventional) memory */
-	lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
-	lowmem <<= 10;
-
-	/* start of EBDA area */
-	ebda_addr = get_bios_ebda();
-
-	/* Fixup: bios puts an EBDA in the top 64K segment */
-	/* of conventional memory, but does not adjust lowmem. */
-	if ((lowmem - ebda_addr) <= 0x10000)
-		lowmem = ebda_addr;
-
-	/* Fixup: bios does not report an EBDA at all. */
-	/* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
-	if ((ebda_addr == 0) && (lowmem >= 0x9f000))
-		lowmem = 0x9f000;
-
-	/* Paranoia: should never happen, but... */
-	if ((lowmem == 0) || (lowmem >= 0x100000))
-		lowmem = 0x9f000;
-
-	/* reserve all memory between lowmem and the 1MB mark */
-	reserve_early(lowmem, 0x100000, "BIOS reserved");
-}
-
 static void __init reserve_setup_data(void)
 {
 	struct setup_data *data;
diff --git a/include/asm-x86/bios_ebda.h b/include/asm-x86/bios_ebda.h
index b4a46b7be79..0033e50c13b 100644
--- a/include/asm-x86/bios_ebda.h
+++ b/include/asm-x86/bios_ebda.h
@@ -14,4 +14,6 @@ static inline unsigned int get_bios_ebda(void)
 	return address;	/* 0 means none */
 }
 
+void reserve_ebda_region(void);
+
 #endif /* _MACH_BIOS_EBDA_H */
-- 
cgit v1.2.3-70-g09d2


From c45a707dbe35cb9aa6490223e5b1129fa3583948 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Mon, 2 Jun 2008 14:26:25 +0800
Subject: x86: linked list of setup_data for i386

This patch adds linked list of struct setup_data supported for i386.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: andi@firstfloor.org
Cc: mingo@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/head.c      | 18 ++++++++++++++++++
 arch/x86/kernel/head64.c    | 18 ------------------
 arch/x86/kernel/setup.c     | 22 ++++++++++++++++++++++
 arch/x86/kernel/setup_32.c  |  3 +++
 arch/x86/kernel/setup_64.c  | 22 ----------------------
 include/asm-x86/bootparam.h |  3 +++
 6 files changed, 46 insertions(+), 40 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index e0d0ce58979..a727c0b9819 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -53,3 +53,21 @@ void __init reserve_ebda_region(void)
 	/* reserve all memory between lowmem and the 1MB mark */
 	reserve_early(lowmem, 0x100000, "BIOS reserved");
 }
+
+void __init reserve_setup_data(void)
+{
+	struct setup_data *data;
+	u64 pa_data;
+	char buf[32];
+
+	if (boot_params.hdr.version < 0x0209)
+		return;
+	pa_data = boot_params.hdr.setup_data;
+	while (pa_data) {
+		data = early_ioremap(pa_data, sizeof(*data));
+		sprintf(buf, "setup data %x", data->type);
+		reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
+		pa_data = data->next;
+		early_iounmap(data, sizeof(*data));
+	}
+}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index f1773c8ee21..5fbed459ff3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -51,24 +51,6 @@ static void __init copy_bootdata(char *real_mode_data)
 	}
 }
 
-static void __init reserve_setup_data(void)
-{
-	struct setup_data *data;
-	unsigned long pa_data;
-	char buf[32];
-
-	if (boot_params.hdr.version < 0x0209)
-		return;
-	pa_data = boot_params.hdr.setup_data;
-	while (pa_data) {
-		data = early_ioremap(pa_data, sizeof(*data));
-		sprintf(buf, "setup data %x", data->type);
-		reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
-		pa_data = data->next;
-		early_iounmap(data, sizeof(*data));
-	}
-}
-
 void __init x86_64_start_kernel(char * real_mode_data)
 {
 	int i;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 6f80b852a19..0a281f2c715 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -137,3 +137,25 @@ void __init setup_per_cpu_areas(void)
 }
 
 #endif
+
+void __init parse_setup_data(void)
+{
+	struct setup_data *data;
+	u64 pa_data;
+
+	if (boot_params.hdr.version < 0x0209)
+		return;
+	pa_data = boot_params.hdr.setup_data;
+	while (pa_data) {
+		data = early_ioremap(pa_data, PAGE_SIZE);
+		switch (data->type) {
+		default:
+			break;
+		}
+#ifndef CONFIG_DEBUG_BOOT_PARAMS
+		free_early(pa_data, pa_data+sizeof(*data)+data->len);
+#endif
+		pa_data = data->next;
+		early_iounmap(data, PAGE_SIZE);
+	}
+}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 2960cbecfa5..ee1ccdbd710 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -681,6 +681,7 @@ void __init setup_arch(char **cmdline_p)
 	pre_setup_arch_hook();
 	early_cpu_init();
 	early_ioremap_init();
+	reserve_setup_data();
 
 #ifdef CONFIG_EFI
 	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
@@ -729,6 +730,8 @@ void __init setup_arch(char **cmdline_p)
 	bss_resource.start = virt_to_phys(&__bss_start);
 	bss_resource.end = virt_to_phys(&__bss_stop)-1;
 
+	parse_setup_data();
+
 	parse_early_param();
 
 	finish_e820_parsing();
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 078c02f6f5f..adf3b04dc58 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -272,28 +272,6 @@ void __attribute__((weak)) __init memory_setup(void)
        machine_specific_memory_setup();
 }
 
-static void __init parse_setup_data(void)
-{
-	struct setup_data *data;
-	unsigned long pa_data;
-
-	if (boot_params.hdr.version < 0x0209)
-		return;
-	pa_data = boot_params.hdr.setup_data;
-	while (pa_data) {
-		data = early_ioremap(pa_data, PAGE_SIZE);
-		switch (data->type) {
-		default:
-			break;
-		}
-#ifndef CONFIG_DEBUG_BOOT_PARAMS
-		free_early(pa_data, pa_data+sizeof(*data)+data->len);
-#endif
-		pa_data = data->next;
-		early_iounmap(data, PAGE_SIZE);
-	}
-}
-
 #ifdef CONFIG_PCI_MMCONFIG
 extern void __cpuinit fam10h_check_enable_mmcfg(void);
 extern void __init check_enable_amd_mmconf_dmi(void);
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
index f62f4733606..0a073904168 100644
--- a/include/asm-x86/bootparam.h
+++ b/include/asm-x86/bootparam.h
@@ -106,4 +106,7 @@ struct boot_params {
 	__u8  _pad9[276];				/* 0xeec */
 } __attribute__((packed));
 
+void reserve_setup_data(void);
+void parse_setup_data(void);
+
 #endif /* _ASM_BOOTPARAM_H */
-- 
cgit v1.2.3-70-g09d2


From 3ed3f06295e69700fa808396f7b350bff2b69de0 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 4 Jun 2008 01:00:47 +0400
Subject: x86: nmi - consolidate nmi_watchdog_default for 32bit mode

64bit mode bootstrap code does set nmi_watchdog to NMI_NONE
by default and doing the same on 32bit mode is safe too.
Such an action saves us from several #ifdef.

Btw, my previous commit

commit 19ec673ced067316b9732bc6d1c4ff4052e5f795
Author: Cyrill Gorcunov <gorcunov@gmail.com>
Date:   Wed May 28 23:00:47 2008 +0400

    x86: nmi - fix incorrect NMI watchdog used by default

did not fix the problem completely, moreover it
introduced additional bug - nmi_watchdog would be
set to either NMI_LOCAL_APIC or NMI_IO_APIC
_regardless_ to boot option if being enabled thru
/proc/sys/kernel/nmi_watchdog. Sorry for that.
Fix it too.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: mingo@redhat.com
Cc: hpa@zytor.com
Cc: macro@linux-mips.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/nmi.c | 14 ++++++++------
 include/asm-x86/nmi.h |  4 +---
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index cbd4fa3c475..27ca8f69b46 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -487,14 +487,16 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
 		return -EIO;
 	}
 
-#ifdef CONFIG_X86_64
 	/* if nmi_watchdog is not set yet, then set it */
 	nmi_watchdog_default();
-#else
-	if (lapic_watchdog_ok())
-		nmi_watchdog = NMI_LOCAL_APIC;
-	else
-		nmi_watchdog = NMI_IO_APIC;
+
+#ifdef CONFIG_X86_32
+	if (nmi_watchdog == NMI_NONE) {
+		if (lapic_watchdog_ok())
+			nmi_watchdog = NMI_LOCAL_APIC;
+		else
+			nmi_watchdog = NMI_IO_APIC;
+	}
 #endif
 
 	if (nmi_watchdog == NMI_LOCAL_APIC) {
diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index 972a4f6f799..470bb4aacb7 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h
@@ -38,12 +38,10 @@ static inline void unset_nmi_pm_callback(struct pm_dev *dev)
 
 #ifdef CONFIG_X86_64
 extern void default_do_nmi(struct pt_regs *);
-extern void nmi_watchdog_default(void);
-#else
-#define nmi_watchdog_default() do { } while (0)
 #endif
 
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
+extern void nmi_watchdog_default(void);
 extern int check_nmi_watchdog(void);
 extern int nmi_watchdog_enabled;
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
-- 
cgit v1.2.3-70-g09d2


From e0da33646826b66ef933d47ea2fb7a693fd849bf Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 8 Jun 2008 18:29:22 -0700
Subject: x86: introduce max_physical_apicid for bigsmp switching

a multi-socket test-system with 3 or 4 ioapics, when 4 dualcore cpus or
2 quadcore cpus installed, needs to switch to bigsmp or physflat.

CPU apic id is [4,11] instead of [0,7], and we need to check max apic
id instead of cpu numbers.

also add check for 32 bit when acpi is not compiled in or acpi=off.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c      |  5 ++++-
 arch/x86/kernel/apic_64.c      |  3 +++
 arch/x86/kernel/genapic_64.c   |  2 +-
 arch/x86/kernel/mpparse.c      |  5 +++++
 arch/x86/kernel/setup.c        |  1 +
 arch/x86/kernel/setup_32.c     | 11 +++++------
 arch/x86/mach-generic/bigsmp.c |  2 +-
 include/asm-x86/mpspec.h       |  1 +
 8 files changed, 21 insertions(+), 9 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index c304759f083..954d67931a5 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1518,6 +1518,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
 		 */
 		cpu = 0;
 
+	if (apicid > max_physical_apicid)
+		max_physical_apicid = apicid;
+
 	/*
 	 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
 	 * but we need to work other dependencies like SMP_SUSPEND etc
@@ -1525,7 +1528,7 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
 	 *       - Ashok Raj <ashok.raj@intel.com>
 	 */
-	if (num_processors > 8) {
+	if (max_physical_apicid >= 8) {
 		switch (boot_cpu_data.x86_vendor) {
 		case X86_VENDOR_INTEL:
 			if (!APIC_XAPIC(version)) {
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 54087f920f2..1872555b98a 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1093,6 +1093,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
 		 */
 		cpu = 0;
 	}
+	if (apicid > max_physical_apicid)
+		max_physical_apicid = apicid;
+
 	/* are we being called early in kernel startup? */
 	if (x86_cpu_to_apicid_early_ptr) {
 		u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index cbaaf69bedb..1fa8be5bd21 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -51,7 +51,7 @@ void __init setup_apic_routing(void)
 	else
 #endif
 
-	if (num_possible_cpus() <= 8)
+	if (max_physical_apicid < 8)
 		genapic = &apic_flat;
 	else
 		genapic = &apic_physflat;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index b4a950da2f9..7591325e616 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -473,6 +473,11 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
 		++mpc_record;
 #endif
 	}
+
+#ifdef CONFIG_X86_GENERICARCH
+       generic_bigsmp_probe();
+#endif
+
 	setup_apic_routing();
 	if (!num_processors)
 		printk(KERN_ERR "MPTABLE: no processors registered!\n");
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0a281f2c715..45a5e247d45 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -17,6 +17,7 @@ unsigned int num_processors;
 unsigned disabled_cpus __cpuinitdata;
 /* Processor that is doing the boot up */
 unsigned int boot_cpu_physical_apicid = -1U;
+unsigned int max_physical_apicid;
 EXPORT_SYMBOL(boot_cpu_physical_apicid);
 
 DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index ee1ccdbd710..be1a9900367 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -838,18 +838,17 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_ACPI
 	acpi_boot_init();
-
+#endif
+#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
+	if (smp_found_config)
+		get_smp_config();
+#endif
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
 	if (def_to_bigsmp)
 		printk(KERN_WARNING "More than 8 CPUs detected and "
 			"CONFIG_X86_PC cannot handle it.\nUse "
 			"CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
 #endif
-#endif
-#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
-	if (smp_found_config)
-		get_smp_config();
-#endif
 
 	e820_setup_gap();
 	e820_mark_nosave_regions(max_low_pfn);
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 95fc463056d..2a243019aca 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -48,7 +48,7 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
 static int probe_bigsmp(void)
 {
 	if (def_to_bigsmp)
-	dmi_bigsmp = 1;
+		dmi_bigsmp = 1;
 	else
 		dmi_check_system(bigsmp_dmi_table);
 	return dmi_bigsmp;
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index f38b68e8de5..4451d720ee0 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -33,6 +33,7 @@ extern int mp_bus_id_to_type[MAX_MP_BUSSES];
 extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
 extern unsigned int boot_cpu_physical_apicid;
+extern unsigned int max_physical_apicid;
 extern int smp_found_config;
 extern int mpc_default_type;
 extern unsigned long mp_lapic_addr;
-- 
cgit v1.2.3-70-g09d2


From d49c4288407b2ffa8cab270cb5bc6882abe969f6 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 8 Jun 2008 18:31:54 -0700
Subject: x86: make generic arch support NUMAQ

... so it could fall back to normal numa and we'd reduce the impact of the
NUMAQ subarch.

NUMAQ depends on GENERICARCH
also decouple genericarch numa from acpi.
also make it fall back to bigsmp if apicid > 8.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                            | 74 ++++++++++++++---------------
 arch/x86/Makefile                           | 18 -------
 arch/x86/boot/compressed/misc.c             |  4 --
 arch/x86/kernel/acpi/boot.c                 |  4 +-
 arch/x86/kernel/io_apic_32.c                |  9 ++--
 arch/x86/kernel/mpparse.c                   | 73 ++++++++++++++++++++++++++--
 arch/x86/kernel/numaq_32.c                  |  2 -
 arch/x86/kernel/summit_32.c                 |  2 +
 arch/x86/mach-es7000/Makefile               |  1 -
 arch/x86/mach-es7000/es7000plat.c           | 47 ------------------
 arch/x86/mach-generic/Makefile              | 10 ++--
 arch/x86/mach-generic/bigsmp.c              |  2 -
 arch/x86/mach-generic/numaq.c               | 41 ++++++++++++++++
 arch/x86/mach-generic/probe.c               | 15 +++++-
 arch/x86/pci/Makefile_32                    |  5 +-
 arch/x86/pci/numa.c                         | 29 ++---------
 drivers/acpi/Kconfig                        |  1 -
 include/asm-x86/mach-generic/mach_mpparse.h |  7 ++-
 include/asm-x86/mach-numaq/mach_apic.h      | 39 ++++-----------
 include/asm-x86/mach-numaq/mach_mpparse.h   | 11 +----
 include/asm-x86/mmzone_32.h                 | 18 ++-----
 include/asm-x86/mpspec.h                    |  6 +++
 include/asm-x86/numaq.h                     |  5 +-
 include/asm-x86/srat.h                      | 12 +++--
 24 files changed, 219 insertions(+), 216 deletions(-)
 create mode 100644 arch/x86/mach-generic/numaq.c

(limited to 'include/asm-x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9e976150442..8b89810fe3f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -268,36 +268,6 @@ config X86_VOYAGER
 	  If you do not specifically know you have a Voyager based machine,
 	  say N here, otherwise the kernel you build will not be bootable.
 
-config X86_NUMAQ
-	bool "NUMAQ (IBM/Sequent)"
-	depends on SMP && X86_32
-	select NUMA
-	help
-	  This option is used for getting Linux to run on a (IBM/Sequent) NUMA
-	  multiquad box. This changes the way that processors are bootstrapped,
-	  and uses Clustered Logical APIC addressing mode instead of Flat Logical.
-	  You will need a new lynxer.elf file to flash your firmware with - send
-	  email to <Martin.Bligh@us.ibm.com>.
-
-config X86_SUMMIT
-	bool "Summit/EXA (IBM x440)"
-	depends on X86_32 && SMP
-	help
-	  This option is needed for IBM systems that use the Summit/EXA chipset.
-	  In particular, it is needed for the x440.
-
-	  If you don't have one of these computers, you should say N here.
-	  If you want to build a NUMA kernel, you must select ACPI.
-
-config X86_BIGSMP
-	bool "Support for other sub-arch SMP systems with more than 8 CPUs"
-	depends on X86_32 && SMP
-	help
-	  This option is needed for the systems that have more than 8 CPUs
-	  and if the system is not of any sub-arch type above.
-
-	  If you don't have such a system, you should say N here.
-
 config X86_VISWS
 	bool "SGI 320/540 (Visual Workstation)"
 	depends on X86_32
@@ -311,12 +281,33 @@ config X86_VISWS
 	  and vice versa. See <file:Documentation/sgi-visws.txt> for details.
 
 config X86_GENERICARCH
-       bool "Generic architecture (Summit, bigsmp, ES7000, default)"
+       bool "Generic architecture"
 	depends on X86_32
        help
-          This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
-	  It is intended for a generic binary kernel.
-	  If you want a NUMA kernel, select ACPI.   We need SRAT for NUMA.
+          This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default
+	  subarchitectures.  It is intended for a generic binary kernel.
+	  if you select them all, kernel will probe it one by one. and will
+	  fallback to default.
+
+if X86_GENERICARCH
+
+config X86_NUMAQ
+	bool "NUMAQ (IBM/Sequent)"
+	depends on SMP && X86_32
+	select NUMA
+	help
+	  This option is used for getting Linux to run on a NUMAQ (IBM/Sequent)
+	  NUMA multiquad box. This changes the way that processors are
+	  bootstrapped, and uses Clustered Logical APIC addressing mode instead
+	  of Flat Logical.  You will need a new lynxer.elf file to flash your
+	  firmware with - send email to <Martin.Bligh@us.ibm.com>.
+
+config X86_SUMMIT
+	bool "Summit/EXA (IBM x440)"
+	depends on X86_32 && SMP
+	help
+	  This option is needed for IBM systems that use the Summit/EXA chipset.
+	  In particular, it is needed for the x440.
 
 config X86_ES7000
 	bool "Support for Unisys ES7000 IA32 series"
@@ -324,8 +315,15 @@ config X86_ES7000
 	help
 	  Support for Unisys ES7000 systems.  Say 'Y' here if this kernel is
 	  supposed to run on an IA32-based Unisys ES7000 system.
-	  Only choose this option if you have such a system, otherwise you
-	  should say N here.
+
+config X86_BIGSMP
+	bool "Support for big SMP systems with more than 8 CPUs"
+	depends on X86_32 && SMP
+	help
+	  This option is needed for the systems that have more than 8 CPUs
+	  and if the system is not of any sub-arch type above.
+
+endif
 
 config X86_RDC321X
 	bool "RDC R-321x SoC"
@@ -913,9 +911,9 @@ config X86_PAE
 config NUMA
 	bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
 	depends on SMP
-	depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL)
+	depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || X86_SUMMIT && ACPI) && EXPERIMENTAL)
 	default n if X86_PC
-	default y if (X86_NUMAQ || X86_SUMMIT)
+	default y if (X86_NUMAQ || X86_SUMMIT || X86_GENERICARCH)
 	help
 	  Enable NUMA (Non Uniform Memory Access) support.
 	  The kernel will try to allocate memory used by a CPU on the
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 30b046bcccb..d6650131659 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -117,29 +117,11 @@ mcore-$(CONFIG_X86_VOYAGER)	:= arch/x86/mach-voyager/
 mflags-$(CONFIG_X86_VISWS)	:= -Iinclude/asm-x86/mach-visws
 mcore-$(CONFIG_X86_VISWS)	:= arch/x86/mach-visws/
 
-# NUMAQ subarch support
-mflags-$(CONFIG_X86_NUMAQ)	:= -Iinclude/asm-x86/mach-numaq
-mcore-$(CONFIG_X86_NUMAQ)	:= arch/x86/mach-default/
-
-# BIGSMP subarch support
-mflags-$(CONFIG_X86_BIGSMP)	:= -Iinclude/asm-x86/mach-bigsmp
-mcore-$(CONFIG_X86_BIGSMP)	:= arch/x86/mach-default/
-
-#Summit subarch support
-mflags-$(CONFIG_X86_SUMMIT)	:= -Iinclude/asm-x86/mach-summit
-mcore-$(CONFIG_X86_SUMMIT)	:= arch/x86/mach-default/
-
 # generic subarchitecture
 mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic
 fcore-$(CONFIG_X86_GENERICARCH)	+= arch/x86/mach-generic/
 mcore-$(CONFIG_X86_GENERICARCH)	:= arch/x86/mach-default/
 
-
-# ES7000 subarch support
-mflags-$(CONFIG_X86_ES7000)	:= -Iinclude/asm-x86/mach-es7000
-fcore-$(CONFIG_X86_ES7000)	:= arch/x86/mach-es7000/
-mcore-$(CONFIG_X86_ES7000)	:= arch/x86/mach-default/
-
 # RDC R-321x subarch support
 mflags-$(CONFIG_X86_RDC321X)	:= -Iinclude/asm-x86/mach-rdc321x
 mcore-$(CONFIG_X86_RDC321X)	:= arch/x86/mach-default/
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 90456cee47c..ba0be6a25ff 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -221,10 +221,6 @@ static char *vidmem;
 static int vidport;
 static int lines, cols;
 
-#ifdef CONFIG_X86_NUMAQ
-void *xquad_portio;
-#endif
-
 #include "../../../../lib/inflate.c"
 
 static void *malloc(int size)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f226bdc19f6..7dc2130046d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -848,7 +848,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
 #ifdef	CONFIG_X86_IO_APIC
 #define MP_ISA_BUS		0
 
-#if defined(CONFIG_X86_ES7000) || defined(CONFIG_X86_GENERICARCH)
+#ifdef CONFIG_X86_ES7000
 extern int es7000_plat;
 #endif
 
@@ -997,7 +997,7 @@ void __init mp_config_acpi_legacy_irqs(void)
 	set_bit(MP_ISA_BUS, mp_bus_not_pci);
 	Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
 
-#if defined(CONFIG_X86_ES7000) || defined(CONFIG_X86_GENERICARCH)
+#ifdef CONFIG_X86_ES7000
 	/*
 	 * Older generations of ES7000 have no legacy identity mappings
 	 */
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 2285b81ad1d..97e62a01f1e 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1722,7 +1722,6 @@ void disable_IO_APIC(void)
  * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
  */
 
-#ifndef CONFIG_X86_NUMAQ
 static void __init setup_ioapic_ids_from_mpc(void)
 {
 	union IO_APIC_reg_00 reg_00;
@@ -1732,6 +1731,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
 	unsigned char old_id;
 	unsigned long flags;
 
+#ifdef CONFIG_X86_NUMAQ
+	if (found_numaq)
+		return;
+#endif
+
 	/*
 	 * Don't check I/O APIC IDs for xAPIC systems.  They have
 	 * no meaning without the serial APIC bus.
@@ -1828,9 +1832,6 @@ static void __init setup_ioapic_ids_from_mpc(void)
 			apic_printk(APIC_VERBOSE, " ok.\n");
 	}
 }
-#else
-static void __init setup_ioapic_ids_from_mpc(void) { }
-#endif
 
 int no_timer_check __initdata;
 
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 7591325e616..1cc7a4b8643 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -49,15 +49,73 @@ static int __init mpf_checksum(unsigned char *mp, int len)
 }
 
 #ifdef CONFIG_X86_NUMAQ
+int found_numaq;
 /*
  * Have to match translation table entries to main table entries by counter
  * hence the mpc_record variable .... can't see a less disgusting way of
  * doing this ....
  */
+struct mpc_config_translation {
+	unsigned char mpc_type;
+	unsigned char trans_len;
+	unsigned char trans_type;
+	unsigned char trans_quad;
+	unsigned char trans_global;
+	unsigned char trans_local;
+	unsigned short trans_reserved;
+};
+
 
 static int mpc_record;
 static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
     __cpuinitdata;
+
+static inline int generate_logical_apicid(int quad, int phys_apicid)
+{
+	return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
+}
+
+
+static inline int mpc_apic_id(struct mpc_config_processor *m,
+			struct mpc_config_translation *translation_record)
+{
+	int quad = translation_record->trans_quad;
+	int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
+
+	printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver, quad, logical_apicid);
+	return logical_apicid;
+}
+
+int mp_bus_id_to_node[MAX_MP_BUSSES];
+
+int mp_bus_id_to_local[MAX_MP_BUSSES];
+
+static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
+	struct mpc_config_translation *translation)
+{
+	int quad = translation->trans_quad;
+	int local = translation->trans_local;
+
+	mp_bus_id_to_node[m->mpc_busid] = quad;
+	mp_bus_id_to_local[m->mpc_busid] = local;
+	printk(KERN_INFO "Bus #%d is %s (node %d)\n",
+	       m->mpc_busid, name, quad);
+}
+
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+static void mpc_oem_pci_bus(struct mpc_config_bus *m,
+	struct mpc_config_translation *translation)
+{
+	int quad = translation->trans_quad;
+	int local = translation->trans_local;
+
+	quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
+}
+
 #endif
 
 static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
@@ -321,11 +379,11 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
 	}
 }
 
-static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
+void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
 				 char *productid)
 {
 	if (strncmp(oem, "IBM NUMA", 8))
-		printk("Warning!  May not be a NUMA-Q system!\n");
+		printk("Warning!  Not a NUMA-Q system!\n");
 	else
 		found_numaq = 1;
 
@@ -388,7 +446,16 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
 		return 0;
 
 #ifdef CONFIG_X86_32
-	mps_oem_check(mpc, oem, str);
+	/*
+	 * need to make sure summit and es7000's mps_oem_check is safe to be
+	 * called early via genericarch 's mps_oem_check
+	 */
+	if (early) {
+#ifdef CONFIG_X86_NUMAQ
+		numaq_mps_oem_check(mpc, oem, str);
+#endif
+	} else
+		mps_oem_check(mpc, oem, str);
 #endif
 
 	/* save the local APIC address, it might be non-default */
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 27b908254fb..f0f1de1c4a1 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -36,8 +36,6 @@
 
 #define	MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
 
-int found_numaq;
-
 /*
  * Function: smp_dump_qct()
  *
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index ae751094eba..d67ce5f044b 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -36,7 +36,9 @@ static struct rio_table_hdr *rio_table_hdr __initdata;
 static struct scal_detail   *scal_devs[MAX_NUMNODES] __initdata;
 static struct rio_detail    *rio_devs[MAX_NUMNODES*4] __initdata;
 
+#ifndef CONFIG_X86_NUMAQ
 static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata;
+#endif
 
 static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
 {
diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/mach-es7000/Makefile
index 69dd4da218d..3ef8b43b62f 100644
--- a/arch/x86/mach-es7000/Makefile
+++ b/arch/x86/mach-es7000/Makefile
@@ -3,4 +3,3 @@
 #
 
 obj-$(CONFIG_X86_ES7000)	:= es7000plat.o
-obj-$(CONFIG_X86_GENERICARCH)	:= es7000plat.o
diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c
index a41c77a4722..4354ce80488 100644
--- a/arch/x86/mach-es7000/es7000plat.c
+++ b/arch/x86/mach-es7000/es7000plat.c
@@ -177,53 +177,6 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr)
 }
 #endif
 
-/*
- * This file also gets compiled if CONFIG_X86_GENERICARCH is set. Generic
- * arch already has got following function definitions (asm-generic/es7000.c)
- * hence no need to define these for that case.
- */
-#ifndef CONFIG_X86_GENERICARCH
-void es7000_sw_apic(void);
-void __init enable_apic_mode(void)
-{
-	es7000_sw_apic();
-	return;
-}
-
-__init int mps_oem_check(struct mp_config_table *mpc, char *oem,
-		char *productid)
-{
-	if (mpc->mpc_oemptr) {
-		struct mp_config_oemtable *oem_table =
-			(struct mp_config_oemtable *)mpc->mpc_oemptr;
-		if (!strncmp(oem, "UNISYS", 6))
-			return parse_unisys_oem((char *)oem_table);
-	}
-	return 0;
-}
-#ifdef CONFIG_ACPI
-/* Hook from generic ACPI tables.c */
-int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-	unsigned long oem_addr;
-	if (!find_unisys_acpi_oem_table(&oem_addr)) {
-		if (es7000_check_dsdt())
-			return parse_unisys_oem((char *)oem_addr);
-		else {
-			setup_unisys();
-			return 1;
-		}
-	}
-	return 0;
-}
-#else
-int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-	return 0;
-}
-#endif
-#endif /* COFIG_X86_GENERICARCH */
-
 static void
 es7000_spin(int n)
 {
diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile
index 19d6d407737..0dbd7803a1d 100644
--- a/arch/x86/mach-generic/Makefile
+++ b/arch/x86/mach-generic/Makefile
@@ -2,7 +2,11 @@
 # Makefile for the generic architecture
 #
 
-EXTRA_CFLAGS	:= -Iarch/x86/kernel
+EXTRA_CFLAGS			:= -Iarch/x86/kernel
 
-obj-y		:= probe.o summit.o bigsmp.o es7000.o default.o 
-obj-y		+= ../../x86/mach-es7000/
+obj-y				:= probe.o default.o
+obj-$(CONFIG_X86_NUMAQ)		+= numaq.o
+obj-$(CONFIG_X86_SUMMIT)	+= summit.o
+obj-$(CONFIG_X86_BIGSMP)	+= bigsmp.o
+obj-$(CONFIG_X86_ES7000)	+= es7000.o
+obj-$(CONFIG_X86_ES7000)	+= ../../x86/mach-es7000/
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 2a243019aca..59d77171455 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -23,10 +23,8 @@ static int dmi_bigsmp; /* can be set by dmi scanners */
 
 static int hp_ht_bigsmp(const struct dmi_system_id *d)
 {
-#ifdef CONFIG_X86_GENERICARCH
 	printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
 	dmi_bigsmp = 1;
-#endif
 	return 0;
 }
 
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
new file mode 100644
index 00000000000..8091e68764c
--- /dev/null
+++ b/arch/x86/mach-generic/numaq.c
@@ -0,0 +1,41 @@
+/*
+ * APIC driver for the IBM NUMAQ chipset.
+ */
+#define APIC_DEFINITION 1
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/smp.h>
+#include <asm/mpspec.h>
+#include <asm/genapic.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <asm/mach-numaq/mach_apic.h>
+#include <asm/mach-numaq/mach_apicdef.h>
+#include <asm/mach-numaq/mach_ipi.h>
+#include <asm/mach-numaq/mach_mpparse.h>
+#include <asm/mach-numaq/mach_wakecpu.h>
+#include <asm/numaq.h>
+
+static int mps_oem_check(struct mp_config_table *mpc, char *oem,
+		char *productid)
+{
+	numaq_mps_oem_check(mpc, oem, productid);
+	return found_numaq;
+}
+
+static int probe_numaq(void)
+{
+	/* already know from get_memcfg_numaq() */
+	return found_numaq;
+}
+
+/* Hook from generic ACPI tables.c */
+static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	return 0;
+}
+
+struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c
index c5ae751b994..ba18dec4855 100644
--- a/arch/x86/mach-generic/probe.c
+++ b/arch/x86/mach-generic/probe.c
@@ -16,6 +16,7 @@
 #include <asm/apicdef.h>
 #include <asm/genapic.h>
 
+extern struct genapic apic_numaq;
 extern struct genapic apic_summit;
 extern struct genapic apic_bigsmp;
 extern struct genapic apic_es7000;
@@ -24,9 +25,18 @@ extern struct genapic apic_default;
 struct genapic *genapic = &apic_default;
 
 static struct genapic *apic_probe[] __initdata = {
+#ifdef CONFIG_X86_NUMAQ
+	&apic_numaq,
+#endif
+#ifdef CONFIG_X86_SUMMIT
 	&apic_summit,
+#endif
+#ifdef CONFIG_X86_BIGSMP
 	&apic_bigsmp,
+#endif
+#ifdef CONFIG_X86_ES7000
 	&apic_es7000,
+#endif
 	&apic_default,	/* must be last */
 	NULL,
 };
@@ -54,6 +64,7 @@ early_param("apic", parse_apic);
 
 void __init generic_bigsmp_probe(void)
 {
+#if CONFIG_X86_BIGSMP
 	/*
 	 * This routine is used to switch to bigsmp mode when
 	 * - There is no apic= option specified by the user
@@ -67,6 +78,7 @@ void __init generic_bigsmp_probe(void)
 			printk(KERN_INFO "Overriding APIC driver with %s\n",
 			       genapic->name);
 		}
+#endif
 }
 
 void __init generic_apic_probe(void)
@@ -88,7 +100,8 @@ void __init generic_apic_probe(void)
 
 /* These functions can switch the APIC even after the initial ->probe() */
 
-int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid)
+int __init mps_oem_check(struct mp_config_table *mpc, char *oem,
+				 char *productid)
 {
 	int i;
 	for (i = 0; apic_probe[i]; ++i) {
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32
index 89ec35d00ef..962d96c0495 100644
--- a/arch/x86/pci/Makefile_32
+++ b/arch/x86/pci/Makefile_32
@@ -13,10 +13,11 @@ pci-y				:= fixup.o
 pci-$(CONFIG_ACPI)		+= acpi.o
 pci-y				+= legacy.o irq.o
 
-# Careful: VISWS and NUMAQ overrule the pci-y above. The colons are
+# Careful: VISWS overrule the pci-y above. The colons are
 # therefor correct. This needs a proper fix by distangling the code.
 pci-$(CONFIG_X86_VISWS)		:= visws.o fixup.o
-pci-$(CONFIG_X86_NUMAQ)		:= numa.o irq.o
+
+pci-$(CONFIG_X86_NUMAQ)		+= numa.o
 
 # Necessary for NUMAQ as well
 pci-$(CONFIG_NUMA)		+= mp_bus_to_node.o
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c
index d9afbae5092..99f1ecd485b 100644
--- a/arch/x86/pci/numa.c
+++ b/arch/x86/pci/numa.c
@@ -6,45 +6,21 @@
 #include <linux/init.h>
 #include <linux/nodemask.h>
 #include <mach_apic.h>
+#include <asm/mpspec.h>
 #include "pci.h"
 
 #define XQUAD_PORTIO_BASE 0xfe400000
 #define XQUAD_PORTIO_QUAD 0x40000  /* 256k per quad. */
 
-int mp_bus_id_to_node[MAX_MP_BUSSES];
 #define BUS2QUAD(global) (mp_bus_id_to_node[global])
 
-int mp_bus_id_to_local[MAX_MP_BUSSES];
 #define BUS2LOCAL(global) (mp_bus_id_to_local[global])
 
-void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
-	struct mpc_config_translation *translation)
-{
-	int quad = translation->trans_quad;
-	int local = translation->trans_local;
-
-	mp_bus_id_to_node[m->mpc_busid] = quad;
-	mp_bus_id_to_local[m->mpc_busid] = local;
-	printk(KERN_INFO "Bus #%d is %s (node %d)\n",
-	       m->mpc_busid, name, quad);
-}
-
-int quad_local_to_mp_bus_id [NR_CPUS/4][4];
 #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
-void mpc_oem_pci_bus(struct mpc_config_bus *m,
-	struct mpc_config_translation *translation)
-{
-	int quad = translation->trans_quad;
-	int local = translation->trans_local;
-
-	quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
-}
 
 /* Where the IO area was mapped on multiquad, always 0 otherwise */
 void *xquad_portio;
-#ifdef CONFIG_X86_NUMAQ
 EXPORT_SYMBOL(xquad_portio);
-#endif
 
 #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
 
@@ -179,6 +155,9 @@ static int __init pci_numa_init(void)
 {
 	int quad;
 
+	if (!found_numaq)
+		return 0;
+
 	raw_pci_ops = &pci_direct_conf1_mq;
 
 	if (pcibios_scanned++)
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index c52fca83326..860f15f36ce 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -4,7 +4,6 @@
 
 menuconfig ACPI
 	bool "ACPI (Advanced Configuration and Power Interface) Support"
-	depends on !X86_NUMAQ
 	depends on !X86_VISWS
 	depends on !IA64_HP_SIM
 	depends on IA64 || X86
diff --git a/include/asm-x86/mach-generic/mach_mpparse.h b/include/asm-x86/mach-generic/mach_mpparse.h
index 0d0b5ba2e9d..586cadbf378 100644
--- a/include/asm-x86/mach-generic/mach_mpparse.h
+++ b/include/asm-x86/mach-generic/mach_mpparse.h
@@ -1,7 +1,10 @@
 #ifndef _MACH_MPPARSE_H
 #define _MACH_MPPARSE_H 1
 
-int mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid); 
-int acpi_madt_oem_check(char *oem_id, char *oem_table_id); 
+
+extern int mps_oem_check(struct mp_config_table *mpc, char *oem,
+			 char *productid);
+
+extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id);
 
 #endif
diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/mach-numaq/mach_apic.h
index 75a56e5afbe..d802465e026 100644
--- a/include/asm-x86/mach-numaq/mach_apic.h
+++ b/include/asm-x86/mach-numaq/mach_apic.h
@@ -20,8 +20,14 @@ static inline cpumask_t target_cpus(void)
 #define INT_DELIVERY_MODE dest_LowestPrio
 #define INT_DEST_MODE 0     /* physical delivery on LOCAL quad */
  
-#define check_apicid_used(bitmap, apicid) physid_isset(apicid, bitmap)
-#define check_apicid_present(bit) physid_isset(bit, phys_cpu_present_map)
+static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
+{
+	return physid_isset(apicid, bitmap);
+}
+static inline unsigned long check_apicid_present(int bit)
+{
+	return physid_isset(bit, phys_cpu_present_map);
+}
 #define apicid_cluster(apicid) (apicid & 0xF0)
 
 static inline int apic_id_registered(void)
@@ -77,11 +83,6 @@ static inline int cpu_present_to_apicid(int mps_cpu)
 		return BAD_APICID;
 }
 
-static inline int generate_logical_apicid(int quad, int phys_apicid)
-{
-	return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
-}
-
 static inline int apicid_to_node(int logical_apicid) 
 {
 	return logical_apicid >> 4;
@@ -95,30 +96,6 @@ static inline physid_mask_t apicid_to_cpu_present(int logical_apicid)
 	return physid_mask_of_physid(cpu + 4*node);
 }
 
-struct mpc_config_translation {
-	unsigned char mpc_type;
-	unsigned char trans_len;
-	unsigned char trans_type;
-	unsigned char trans_quad;
-	unsigned char trans_global;
-	unsigned char trans_local;
-	unsigned short trans_reserved;
-};
-
-static inline int mpc_apic_id(struct mpc_config_processor *m, 
-			struct mpc_config_translation *translation_record)
-{
-	int quad = translation_record->trans_quad;
-	int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
-
-	printk("Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
-	       m->mpc_apicid,
-	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-	       m->mpc_apicver, quad, logical_apicid);
-	return logical_apicid;
-}
-
 extern void *xquad_portio;
 
 static inline void setup_portio_remap(void)
diff --git a/include/asm-x86/mach-numaq/mach_mpparse.h b/include/asm-x86/mach-numaq/mach_mpparse.h
index 459b1240118..626aef6b155 100644
--- a/include/asm-x86/mach-numaq/mach_mpparse.h
+++ b/include/asm-x86/mach-numaq/mach_mpparse.h
@@ -1,14 +1,7 @@
 #ifndef __ASM_MACH_MPPARSE_H
 #define __ASM_MACH_MPPARSE_H
 
-extern void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
-			     struct mpc_config_translation *translation);
-extern void mpc_oem_pci_bus(struct mpc_config_bus *m,
-	struct mpc_config_translation *translation);
-
-/* Hook from generic ACPI tables.c */
-static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-}
+extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
+				char *productid);
 
 #endif /* __ASM_MACH_MPPARSE_H */
diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h
index ab0012888c4..b2298a22756 100644
--- a/include/asm-x86/mmzone_32.h
+++ b/include/asm-x86/mmzone_32.h
@@ -12,11 +12,9 @@
 extern struct pglist_data *node_data[];
 #define NODE_DATA(nid)	(node_data[nid])
 
-#ifdef CONFIG_X86_NUMAQ
-	#include <asm/numaq.h>
-#elif defined(CONFIG_ACPI_SRAT)/* summit or generic arch */
-	#include <asm/srat.h>
-#endif
+#include <asm/numaq.h>
+/* summit or generic arch */
+#include <asm/srat.h>
 
 extern int get_memcfg_numa_flat(void);
 /*
@@ -26,14 +24,11 @@ extern int get_memcfg_numa_flat(void);
  */
 static inline void get_memcfg_numa(void)
 {
-#ifdef CONFIG_X86_NUMAQ
+
 	if (get_memcfg_numaq())
 		return;
-#elif defined(CONFIG_ACPI_SRAT)
 	if (get_memcfg_from_srat())
 		return;
-#endif
-
 	get_memcfg_numa_flat();
 }
 
@@ -42,7 +37,6 @@ extern int early_pfn_to_nid(unsigned long pfn);
 #else /* !CONFIG_NUMA */
 
 #define get_memcfg_numa get_memcfg_numa_flat
-#define get_zholes_size(n) (0)
 
 #endif /* CONFIG_NUMA */
 
@@ -83,9 +77,6 @@ static inline int pfn_to_nid(unsigned long pfn)
 	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
 })
 
-#ifdef CONFIG_X86_NUMAQ            /* we have contiguous memory on NUMA-Q */
-#define pfn_valid(pfn)          ((pfn) < num_physpages)
-#else
 static inline int pfn_valid(int pfn)
 {
 	int nid = pfn_to_nid(pfn);
@@ -94,7 +85,6 @@ static inline int pfn_valid(int pfn)
 		return (pfn < node_end_pfn(nid));
 	return 0;
 }
-#endif /* CONFIG_X86_NUMAQ */
 
 #endif /* CONFIG_DISCONTIGMEM */
 
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index 4451d720ee0..6e9c9588b1f 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -13,6 +13,12 @@ extern int apic_version[MAX_APICS];
 extern u8 apicid_2_node[];
 extern int pic_mode;
 
+#ifdef CONFIG_X86_NUMAQ
+extern int mp_bus_id_to_node[MAX_MP_BUSSES];
+extern int mp_bus_id_to_local[MAX_MP_BUSSES];
+extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+#endif
+
 #define MAX_APICID 256
 
 #else
diff --git a/include/asm-x86/numaq.h b/include/asm-x86/numaq.h
index 739d16484b8..ef068d2465d 100644
--- a/include/asm-x86/numaq.h
+++ b/include/asm-x86/numaq.h
@@ -157,9 +157,10 @@ struct sys_cfg_data {
 	struct		eachquadmem eq[MAX_NUMNODES];	/* indexed by quad id */
 };
 
-static inline unsigned long *get_zholes_size(int nid)
+#else
+static inline int get_memcfg_numaq(void)
 {
-	return NULL;
+	return 0;
 }
 #endif /* CONFIG_X86_NUMAQ */
 #endif /* NUMAQ_H */
diff --git a/include/asm-x86/srat.h b/include/asm-x86/srat.h
index f4bba131d06..456fe0b5a92 100644
--- a/include/asm-x86/srat.h
+++ b/include/asm-x86/srat.h
@@ -27,11 +27,13 @@
 #ifndef _ASM_SRAT_H_
 #define _ASM_SRAT_H_
 
-#ifndef CONFIG_ACPI_SRAT
-#error CONFIG_ACPI_SRAT not defined, and srat.h header has been included
-#endif
-
+#ifdef CONFIG_ACPI_SRAT
 extern int get_memcfg_from_srat(void);
-extern unsigned long *get_zholes_size(int);
+#else
+static inline int get_memcfg_from_srat(void)
+{
+	return 0;
+}
+#endif
 
 #endif /* _ASM_SRAT_H_ */
-- 
cgit v1.2.3-70-g09d2


From af1cf204ba2fd8135933a2e4df523fb1112dc0e2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 25 May 2008 21:16:06 +0200
Subject: x86, mpparse: build fix

fix:

  LD      .tmp_vmlinux1
  arch/x86/kernel/built-in.o: In function `setup_arch':
  : undefined reference to `early_reserve_e820_mpc_new'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mpspec.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index 6e9c9588b1f..b8ba37496e2 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -46,7 +46,11 @@ extern unsigned long mp_lapic_addr;
 
 extern void find_smp_config(void);
 extern void get_smp_config(void);
+#ifdef CONFIG_X86_MPPARSE
 extern void early_reserve_e820_mpc_new(void);
+#else
+static inline void early_reserve_e820_mpc_new(void) { }
+#endif
 
 void __cpuinit generic_processor_info(int apicid, int version);
 #ifdef CONFIG_ACPI
-- 
cgit v1.2.3-70-g09d2


From ce8e37cdbdb34a9faeade22e0e6440f0d04560f5 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Fri, 6 Jun 2008 10:21:39 +0100
Subject: x86: set PAE PHYSICAL_MASK_SHIFT to 44 bits.

When a 64-bit x86 processor runs in 32-bit PAE mode, a pte can
potentially have the same number of physical address bits as the
64-bit host ("Enhanced Legacy PAE Paging").  This means, in theory,
we could have up to 52 bits of physical address in a pte.

The 32-bit kernel uses a 32-bit unsigned long to represent a pfn.
This means that it can only represent physical addresses up to 32+12=44
bits wide.  Rather than widening pfns everywhere, just set 2^44 as the
Linux x86_32-PAE architectural limit for physical address size.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/page_32.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 424e82f8ae2..ccf0ba3c3ab 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -14,7 +14,8 @@
 #define __PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
 
 #ifdef CONFIG_X86_PAE
-#define __PHYSICAL_MASK_SHIFT	36
+/* 44=32+12, the limit we can fit into an unsigned long pfn */
+#define __PHYSICAL_MASK_SHIFT	44
 #define __VIRTUAL_MASK_SHIFT	32
 #define PAGETABLE_LEVELS	3
 
-- 
cgit v1.2.3-70-g09d2


From 9e26d84273541a8c6c2efb705457ca8e6245fb73 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 6 Jun 2008 12:01:13 +0200
Subject: fix build bug in "x86: add PCI extended config space access for AMD
 Barcelona"

Also much less code now.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c    |  2 --
 arch/x86/kernel/cpu/amd_64.c |  1 +
 arch/x86/kernel/cpu/cpu.h    |  5 +++++
 arch/x86/kernel/setup.c      |  2 +-
 arch/x86/kernel/setup.h      | 26 --------------------------
 arch/x86/kernel/setup_64.c   |  2 --
 include/asm-x86/mmconfig.h   |  6 ------
 7 files changed, 7 insertions(+), 37 deletions(-)
 delete mode 100644 arch/x86/kernel/setup.h

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 656b40aed64..a38d54f4ff2 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -4,10 +4,8 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
-#include <asm/mmconfig.h>
 
 #include <mach_apic.h>
-#include "../setup.h"
 #include "cpu.h"
 
 /*
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 180097e9921..626fc21f027 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -6,6 +6,7 @@
 #include <asm/cacheflush.h>
 
 #include <mach_apic.h>
+#include "cpu.h"
 
 extern int __cpuinit get_model_name(struct cpuinfo_x86 *c);
 extern void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 783691b2a73..f5d5bb1b554 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,3 +1,4 @@
+#ifdef CONFIG_X86_32
 
 struct cpu_model_info {
 	int vendor;
@@ -36,3 +37,7 @@ extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[];
 
 extern int get_model_name(struct cpuinfo_x86 *c);
 extern void display_cacheinfo(struct cpuinfo_x86 *c);
+
+#endif /* CONFIG_X86_32 */
+
+extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d8f17123401..20e14dbef10 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -136,6 +136,7 @@ void __init setup_per_cpu_areas(void)
 	setup_cpumask_of_cpu();
 }
 
+#endif
 #define ENABLE_CF8_EXT_CFG      (1ULL << 46)
 
 void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c)
@@ -149,4 +150,3 @@ void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c)
 	set_cpu_cap(c, X86_FEATURE_PCI_EXT_CFG);
 }
 
-#endif
diff --git a/arch/x86/kernel/setup.h b/arch/x86/kernel/setup.h
deleted file mode 100644
index 66cc2c7f961..00000000000
--- a/arch/x86/kernel/setup.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Internal declarations for shared x86 setup code.
- *
- * Copyright (c) 2008 Advanced Micro Devices, Inc.
- * Contributed by Robert Richter <robert.richter@amd.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- * 02111-1307 USA
- */
-
-#ifndef _ARCH_X86_KERNEL_SETUP_H
-
-extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c);
-
-#endif	/* _ARCH_X86_KERNEL_SETUP_H */
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 215bd67e5b6..25afdd80a3c 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -73,8 +73,6 @@
 #include <asm/pat.h>
 #include <asm/mmconfig.h>
 
-#include "setup.h"
-
 #include <mach_apic.h>
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h
index 691798fbee1..95beda07c6f 100644
--- a/include/asm-x86/mmconfig.h
+++ b/include/asm-x86/mmconfig.h
@@ -9,10 +9,4 @@ static inline void fam10h_check_enable_mmcfg(void) { }
 static inline void check_enable_amd_mmconf_dmi(void) { }
 #endif
 
-#if defined(CONFIG_SMP) && defined(CONFIG_X86_64)
-extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c);
-#else
-static inline void amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c) { }
-#endif
-
 #endif
-- 
cgit v1.2.3-70-g09d2


From aa83f3f2cfc74d66d01b1d2eb1485ea1103a0f4e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 9 Jun 2008 17:11:13 +0200
Subject: x86: cleanup C1E enabled detection

Rename the "MSR_K8_ENABLE_C1E" MSR to INT_PENDING_MSG, which is the
name in the data sheet as well. Move the C1E mask to the header file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c    | 5 ++---
 arch/x86/kernel/cpu/amd_64.c | 5 ++---
 include/asm-x86/msr-index.h  | 4 +++-
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a38d54f4ff2..30b5055be35 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -25,7 +25,6 @@ extern void vide(void);
 __asm__(".align 4\nvide: ret");
 
 #ifdef CONFIG_X86_LOCAL_APIC
-#define ENABLE_C1E_MASK         0x18000000
 #define CPUID_PROCESSOR_SIGNATURE       1
 #define CPUID_XFAM              0x0ff00000
 #define CPUID_XFAM_K8           0x00000000
@@ -45,8 +44,8 @@ static __cpuinit int amd_apic_timer_broken(void)
 			break;
 	case CPUID_XFAM_10H:
 	case CPUID_XFAM_11H:
-		rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
-		if (lo & ENABLE_C1E_MASK) {
+		rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
+		if (lo & K8_INTP_C1E_ACTIVE_MASK) {
 			if (smp_processor_id() != boot_cpu_physical_apicid)
 				printk(KERN_INFO "AMD C1E detected late. "
 				       "	Force timer broadcast.\n");
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 626fc21f027..6eef3c79d15 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -110,7 +110,6 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
 #endif
 }
 
-#define ENABLE_C1E_MASK		0x18000000
 #define CPUID_PROCESSOR_SIGNATURE	1
 #define CPUID_XFAM		0x0ff00000
 #define CPUID_XFAM_K8		0x00000000
@@ -130,8 +129,8 @@ static __cpuinit int amd_apic_timer_broken(void)
 			break;
 	case CPUID_XFAM_10H:
 	case CPUID_XFAM_11H:
-		rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
-		if (lo & ENABLE_C1E_MASK)
+		rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
+		if (lo & K8_INTP_C1E_ACTIVE_MASK)
 			return 1;
 		break;
 	default:
diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h
index 09413ad39d3..44bce773012 100644
--- a/include/asm-x86/msr-index.h
+++ b/include/asm-x86/msr-index.h
@@ -111,7 +111,9 @@
 #define MSR_K8_TOP_MEM2			0xc001001d
 #define MSR_K8_SYSCFG			0xc0010010
 #define MSR_K8_HWCR			0xc0010015
-#define MSR_K8_ENABLE_C1E		0xc0010055
+#define MSR_K8_INT_PENDING_MSG		0xc0010055
+/* C1E active bits in int pending message */
+#define K8_INTP_C1E_ACTIVE_MASK		0x18000000
 #define MSR_K8_TSEG_ADDR		0xc0010112
 #define K8_MTRRFIXRANGE_DRAM_ENABLE	0x00040000 /* MtrrFixDramEn bit    */
 #define K8_MTRRFIXRANGE_DRAM_MODIFY	0x00080000 /* MtrrFixDramModEn bit */
-- 
cgit v1.2.3-70-g09d2


From e7891c733f9b26c851edde50cf886a30bd133dbd Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 22 May 2008 14:35:21 -0700
Subject: PCI/x86: write_pci_config_byte fix offset

also add write_pci_config_16

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/early.c         | 9 ++++++++-
 include/asm-x86/pci-direct.h | 1 +
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index 42df4b6606d..8e2821e8dac 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -49,7 +49,14 @@ void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val)
 {
 	PDprintk("%x writing to %x: %x\n", slot, offset, val);
 	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
-	outb(val, 0xcfc);
+	outb(val, 0xcfc + (offset&3));
+}
+
+void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val)
+{
+	PDprintk("%x writing to %x: %x\n", slot, offset, val);
+	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
+	outw(val, 0xcfc + (offset&2));
 }
 
 int early_pci_allowed(void)
diff --git a/include/asm-x86/pci-direct.h b/include/asm-x86/pci-direct.h
index 5b21485be57..7bd40b4de75 100644
--- a/include/asm-x86/pci-direct.h
+++ b/include/asm-x86/pci-direct.h
@@ -11,6 +11,7 @@ extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset);
 extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset);
 extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val);
 extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val);
+extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val);
 
 extern int early_pci_allowed(void);
 
-- 
cgit v1.2.3-70-g09d2


From e3f2baebf4209b5927e23fa65d5977d31db936b3 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 22 May 2008 14:35:11 -0700
Subject: PCI/x86: early dump pci conf space v2

Allows us to dump PCI space before any kernel changes have been made.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/kernel/setup_64.c   |  5 +++++
 arch/x86/pci/common.c        |  4 ++++
 arch/x86/pci/early.c         | 51 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/pci-direct.h |  3 +++
 4 files changed, 63 insertions(+)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 6dff1286ad8..524b6850b2c 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -361,6 +361,11 @@ void __init setup_arch(char **cmdline_p)
 
 	parse_early_param();
 
+#ifdef CONFIG_PCI
+	if (pci_early_dump_regs)
+		early_dump_pci_devices();
+#endif
+
 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
 	if (init_ohci1394_dma_early)
 		init_ohci1394_dma_on_all_controllers();
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 3a5261bdff5..d19fd07bafd 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -20,6 +20,7 @@
 unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
 				PCI_PROBE_MMCONF;
 
+unsigned int pci_early_dump_regs;
 static int pci_bf_sort;
 int pci_routeirq;
 int pcibios_last_bus = -1;
@@ -511,6 +512,9 @@ char * __devinit  pcibios_setup(char *str)
 	} else if (!strcmp(str, "use_crs")) {
 		pci_probe |= PCI_USE__CRS;
 		return NULL;
+	} else if (!strcmp(str, "earlydump")) {
+		pci_early_dump_regs = 1;
+		return NULL;
 	} else if (!strcmp(str, "routeirq")) {
 		pci_routeirq = 1;
 		return NULL;
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index 8e2821e8dac..858dbe3399f 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -64,3 +64,54 @@ int early_pci_allowed(void)
 	return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) ==
 			PCI_PROBE_CONF1;
 }
+
+void early_dump_pci_device(u8 bus, u8 slot, u8 func)
+{
+	int i;
+	int j;
+	u32 val;
+
+	printk("PCI: %02x:%02x:%02x", bus, slot, func);
+
+	for (i = 0; i < 256; i += 4) {
+		if (!(i & 0x0f))
+			printk("\n%04x:",i);
+
+		val = read_pci_config(bus, slot, func, i);
+		for (j = 0; j < 4; j++) {
+			printk(" %02x", val & 0xff);
+			val >>= 8;
+		}
+	}
+	printk("\n");
+}
+
+void early_dump_pci_devices(void)
+{
+	unsigned bus, slot, func;
+
+	if (!early_pci_allowed())
+		return;
+
+	for (bus = 0; bus < 256; bus++) {
+		for (slot = 0; slot < 32; slot++) {
+			for (func = 0; func < 8; func++) {
+				u32 class;
+				u8 type;
+				class = read_pci_config(bus, slot, func,
+							PCI_CLASS_REVISION);
+				if (class == 0xffffffff)
+					break;
+
+				early_dump_pci_device(bus, slot, func);
+
+				/* No multi-function device? */
+				type = read_pci_config_byte(bus, slot, func,
+							       PCI_HEADER_TYPE);
+				if (!(type & 0x80))
+					break;
+			}
+		}
+	}
+}
+
diff --git a/include/asm-x86/pci-direct.h b/include/asm-x86/pci-direct.h
index 7bd40b4de75..80c775d9fe2 100644
--- a/include/asm-x86/pci-direct.h
+++ b/include/asm-x86/pci-direct.h
@@ -15,4 +15,7 @@ extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val);
 
 extern int early_pci_allowed(void);
 
+extern unsigned int pci_early_dump_regs;
+extern void early_dump_pci_device(u8 bus, u8 slot, u8 func);
+extern void early_dump_pci_devices(void);
 #endif
-- 
cgit v1.2.3-70-g09d2


From cd7a4e936d345ab4cb49d68192d90bd4e4c58458 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 10 Jun 2008 16:05:39 +0200
Subject: x86: PAT: fixed checkpatch errors (and whitespaces)

x86: PAT: fixed checkpatch errors (and whitespaces)

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/addon_cpuid_features.c |  2 --
 arch/x86/mm/pat.c                          | 29 ++++++++++++++---------------
 include/asm-x86/pat.h                      |  6 ++----
 3 files changed, 16 insertions(+), 21 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 2df461f06a5..84a8220a607 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -1,9 +1,7 @@
-
 /*
  *	Routines to indentify additional cpu features that are scattered in
  *	cpuid space.
  */
-
 #include <linux/cpu.h>
 
 #include <asm/pat.h>
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 65105b1195a..a8b69bb2697 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -66,7 +66,7 @@ enum {
 	PAT_UC_MINUS = 7,	/* UC, but can be overriden by MTRR */
 };
 
-#define PAT(x,y)	((u64)PAT_ ## y << ((x)*8))
+#define PAT(x, y)	((u64)PAT_ ## y << ((x)*8))
 
 void pat_init(void)
 {
@@ -100,8 +100,8 @@ void pat_init(void)
 	 *      011 UC		_PAGE_CACHE_UC
 	 * PAT bit unused
 	 */
-	pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) |
-	      PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC);
+	pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
+	      PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
 
 	/* Boot CPU check */
 	if (!boot_pat_state)
@@ -117,11 +117,11 @@ void pat_init(void)
 static char *cattr_name(unsigned long flags)
 {
 	switch (flags & _PAGE_CACHE_MASK) {
-		case _PAGE_CACHE_UC:		return "uncached";
-		case _PAGE_CACHE_UC_MINUS:	return "uncached-minus";
-		case _PAGE_CACHE_WB:		return "write-back";
-		case _PAGE_CACHE_WC:		return "write-combining";
-		default:			return "broken";
+	case _PAGE_CACHE_UC:		return "uncached";
+	case _PAGE_CACHE_UC_MINUS:	return "uncached-minus";
+	case _PAGE_CACHE_WB:		return "write-back";
+	case _PAGE_CACHE_WC:		return "write-combining";
+	default:			return "broken";
 	}
 }
 
@@ -536,11 +536,11 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 	 * we maintain the tradition of paranoia in this code.
 	 */
 	if (!pat_wc_enabled &&
-	    ! ( boot_cpu_has(X86_FEATURE_MTRR) ||
-		boot_cpu_has(X86_FEATURE_K6_MTRR) ||
-		boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
-		boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
-	   (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
+	    !(boot_cpu_has(X86_FEATURE_MTRR) ||
+	      boot_cpu_has(X86_FEATURE_K6_MTRR) ||
+	      boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
+	      boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
+	    (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
 		flags = _PAGE_CACHE_UC;
 	}
 #endif
@@ -562,7 +562,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 		return 0;
 
 	if (pfn <= max_pfn_mapped &&
-            ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
+	    ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
 		free_memtype(offset, offset + size);
 		printk(KERN_INFO
 		"%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
@@ -600,4 +600,3 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
 
 	free_memtype(addr, addr + size);
 }
-
diff --git a/include/asm-x86/pat.h b/include/asm-x86/pat.h
index 88f60cc6a22..6fa9710ae09 100644
--- a/include/asm-x86/pat.h
+++ b/include/asm-x86/pat.h
@@ -1,6 +1,5 @@
-
 #ifndef _ASM_PAT_H
-#define _ASM_PAT_H 1
+#define _ASM_PAT_H
 
 #include <linux/types.h>
 
@@ -8,7 +7,7 @@
 extern int pat_wc_enabled;
 extern void validate_pat_support(struct cpuinfo_x86 *c);
 #else
-static const int pat_wc_enabled = 0;
+static const int pat_wc_enabled;
 static inline void validate_pat_support(struct cpuinfo_x86 *c) { }
 #endif
 
@@ -21,4 +20,3 @@ extern int free_memtype(u64 start, u64 end);
 extern void pat_disable(char *reason);
 
 #endif
-
-- 
cgit v1.2.3-70-g09d2


From 499f8f84b8324ba27d756e03f373fa16eeed9ccc Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 10 Jun 2008 16:06:21 +0200
Subject: x86: rename pat_wc_enabled to pat_enabled

BTW, what does pat_wc_enabled stand for? Does it mean
"write-combining"?

Currently it is used to globally switch on or off PAT support.
Thus I renamed it to pat_enabled.
I think this increases readability (and hope that I didn't miss
something).

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap.c  |  4 ++--
 arch/x86/mm/pageattr.c |  2 +-
 arch/x86/mm/pat.c      | 16 ++++++++--------
 arch/x86/pci/i386.c    |  4 ++--
 include/asm-x86/pat.h  |  4 ++--
 5 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 71bb3159031..ddeafed1171 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -261,7 +261,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
 {
 	/*
 	 * Ideally, this should be:
-	 *	pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
+	 *	pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
 	 *
 	 * Till we fix all X drivers to use ioremap_wc(), we will use
 	 * UC MINUS.
@@ -285,7 +285,7 @@ EXPORT_SYMBOL(ioremap_nocache);
  */
 void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
 {
-	if (pat_wc_enabled)
+	if (pat_enabled)
 		return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
 					__builtin_return_address(0));
 	else
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 60bcb5b6a37..6916fe4bf0c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -805,7 +805,7 @@ int _set_memory_wc(unsigned long addr, int numpages)
 
 int set_memory_wc(unsigned long addr, int numpages)
 {
-	if (!pat_wc_enabled)
+	if (!pat_enabled)
 		return set_memory_uc(addr, numpages);
 
 	if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index a8b69bb2697..4beccea0897 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -26,11 +26,11 @@
 #include <asm/io.h>
 
 #ifdef CONFIG_X86_PAT
-int __read_mostly pat_wc_enabled = 1;
+int __read_mostly pat_enabled = 1;
 
 void __cpuinit pat_disable(char *reason)
 {
-	pat_wc_enabled = 0;
+	pat_enabled = 0;
 	printk(KERN_INFO "%s\n", reason);
 }
 
@@ -72,7 +72,7 @@ void pat_init(void)
 {
 	u64 pat;
 
-	if (!pat_wc_enabled)
+	if (!pat_enabled)
 		return;
 
 	/* Paranoia check. */
@@ -225,8 +225,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 	unsigned long actual_type;
 	int err = 0;
 
-	/* Only track when pat_wc_enabled */
-	if (!pat_wc_enabled) {
+	/* Only track when pat_enabled */
+	if (!pat_enabled) {
 		/* This is identical to page table setting without PAT */
 		if (ret_type) {
 			if (req_type == -1) {
@@ -440,8 +440,8 @@ int free_memtype(u64 start, u64 end)
 	struct memtype *ml;
 	int err = -EINVAL;
 
-	/* Only track when pat_wc_enabled */
-	if (!pat_wc_enabled) {
+	/* Only track when pat_enabled */
+	if (!pat_enabled) {
 		return 0;
 	}
 
@@ -535,7 +535,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 	 * caching for the high addresses through the KEN pin, but
 	 * we maintain the tradition of paranoia in this code.
 	 */
-	if (!pat_wc_enabled &&
+	if (!pat_enabled &&
 	    !(boot_cpu_has(X86_FEATURE_MTRR) ||
 	      boot_cpu_has(X86_FEATURE_K6_MTRR) ||
 	      boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 10fb308fded..6ccd7a108cd 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -299,9 +299,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 		return -EINVAL;
 
 	prot = pgprot_val(vma->vm_page_prot);
-	if (pat_wc_enabled && write_combine)
+	if (pat_enabled && write_combine)
 		prot |= _PAGE_CACHE_WC;
-	else if (pat_wc_enabled || boot_cpu_data.x86 > 3)
+	else if (pat_enabled || boot_cpu_data.x86 > 3)
 		/*
 		 * ioremap() and ioremap_nocache() defaults to UC MINUS for now.
 		 * To avoid attribute conflicts, request UC MINUS here
diff --git a/include/asm-x86/pat.h b/include/asm-x86/pat.h
index 6fa9710ae09..7edc4730721 100644
--- a/include/asm-x86/pat.h
+++ b/include/asm-x86/pat.h
@@ -4,10 +4,10 @@
 #include <linux/types.h>
 
 #ifdef CONFIG_X86_PAT
-extern int pat_wc_enabled;
+extern int pat_enabled;
 extern void validate_pat_support(struct cpuinfo_x86 *c);
 #else
-static const int pat_wc_enabled;
+static const int pat_enabled;
 static inline void validate_pat_support(struct cpuinfo_x86 *c) { }
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From bf07dc864902b3e788de5ab50dc62d5677da90f2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 12 Jun 2008 10:27:08 +0200
Subject: x86: remove obsolete PM definitions from NMI header

Remove obsolete and no longer used PM-related definitions from
include/asm-x86/nmi.h.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/nmi.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index 8455bf36d8d..621bd090c1f 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h
@@ -15,27 +15,6 @@
  */
 int do_nmi_callback(struct pt_regs *regs, int cpu);
 
-#ifdef CONFIG_PM
-
-/** Replace the PM callback routine for NMI. */
-struct pm_dev *set_nmi_pm_callback(pm_callback callback);
-
-/** Unset the PM callback routine back to the default. */
-void unset_nmi_pm_callback(struct pm_dev *dev);
-
-#else
-
-static inline struct pm_dev *set_nmi_pm_callback(pm_callback callback)
-{
-	return 0;
-}
-
-static inline void unset_nmi_pm_callback(struct pm_dev *dev)
-{
-}
-
-#endif /* CONFIG_PM */
-
 #ifdef CONFIG_X86_64
 extern void default_do_nmi(struct pt_regs *);
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
-- 
cgit v1.2.3-70-g09d2


From 6703f6d10dcd3316e03641a5ecaa6c8a04374d98 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 10 Jun 2008 00:10:48 +0200
Subject: x86, gart: add resume handling

If GART IOMMU is used on an AMD64 system, the northbridge registers
related to it should be restored during resume so that memory is not
corrupted.  Make gart_resume() handle that as appropriate.

Ref. http://lkml.org/lkml/2008/5/25/96 and the following thread.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/aperture_64.c |  2 ++
 arch/x86/kernel/pci-gart_64.c | 57 +++++++++++++++++++++++++++++++++++++++----
 include/asm-x86/gart.h        |  1 +
 3 files changed, 55 insertions(+), 5 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index eb20f168c0f..3409abb231a 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -496,4 +496,6 @@ out:
 			write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25);
 		}
 	}
+
+	set_up_gart_resume(aper_order, aper_alloc);
 }
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 3710097f02e..f505c389035 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -549,14 +549,63 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
 	return aper_base;
 }
 
+static void enable_gart_translations(void)
+{
+	int i;
+
+	for (i = 0; i < num_k8_northbridges; i++) {
+		struct pci_dev *dev = k8_northbridges[i];
+
+		enable_gart_translation(dev, __pa(agp_gatt_table));
+	}
+}
+
+/*
+ * If fix_up_north_bridges is set, the north bridges have to be fixed up on
+ * resume in the same way as they are handled in gart_iommu_hole_init().
+ */
+static bool fix_up_north_bridges;
+static u32 aperture_order;
+static u32 aperture_alloc;
+
+void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
+{
+	fix_up_north_bridges = true;
+	aperture_order = aper_order;
+	aperture_alloc = aper_alloc;
+}
+
 static int gart_resume(struct sys_device *dev)
 {
+	printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n");
+
+	if (fix_up_north_bridges) {
+		int i;
+
+		printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n");
+
+		for (i = 0; i < num_k8_northbridges; i++) {
+			struct pci_dev *dev = k8_northbridges[i];
+
+			/*
+			 * Don't enable translations just yet.  That is the next
+			 * step.  Restore the pre-suspend aperture settings.
+			 */
+			pci_write_config_dword(dev, AMD64_GARTAPERTURECTL,
+						aperture_order << 1);
+			pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE,
+						aperture_alloc >> 25);
+		}
+	}
+
+	enable_gart_translations();
+
 	return 0;
 }
 
 static int gart_suspend(struct sys_device *dev, pm_message_t state)
 {
-	return -EINVAL;
+	return 0;
 }
 
 static struct sysdev_class gart_sysdev_class = {
@@ -614,16 +663,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 	memset(gatt, 0, gatt_size);
 	agp_gatt_table = gatt;
 
-	for (i = 0; i < num_k8_northbridges; i++) {
-		dev = k8_northbridges[i];
-		enable_gart_translation(dev, __pa(gatt));
-	}
+	enable_gart_translations();
 
 	error = sysdev_class_register(&gart_sysdev_class);
 	if (!error)
 		error = sysdev_register(&device_gart);
 	if (error)
 		panic("Could not register gart_sysdev -- would corrupt data on next suspend");
+
 	flush_gart();
 
 	printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h
index c818b96f936..eeca2f51fd8 100644
--- a/include/asm-x86/gart.h
+++ b/include/asm-x86/gart.h
@@ -14,6 +14,7 @@ extern void gart_iommu_shutdown(void);
 extern void __init gart_parse_options(char *);
 extern void early_gart_iommu_check(void);
 extern void gart_iommu_hole_init(void);
+extern void set_up_gart_resume(u32, u32);
 extern int fallback_aper_order;
 extern int fallback_aper_force;
 extern int gart_iommu_aperture;
-- 
cgit v1.2.3-70-g09d2


From e6e07d8a2d2989c1f42287131308aa2fde253631 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 16 Jun 2008 16:08:17 -0700
Subject: x86: make asm/asm.h work for asm code.

This is useful for unifying some pieces of asm code.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/asm-x86/asm.h | 40 ++++++++++++++++++----------------------
 1 file changed, 18 insertions(+), 22 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h
index 90dec0c2364..70939820c55 100644
--- a/include/asm-x86/asm.h
+++ b/include/asm-x86/asm.h
@@ -1,33 +1,29 @@
 #ifndef _ASM_X86_ASM_H
 #define _ASM_X86_ASM_H
 
-#ifdef CONFIG_X86_32
-/* 32 bits */
-
-# define _ASM_PTR	" .long "
-# define _ASM_ALIGN	" .balign 4 "
-# define _ASM_MOV_UL	" movl "
-
-# define _ASM_INC	" incl "
-# define _ASM_DEC	" decl "
-# define _ASM_ADD	" addl "
-# define _ASM_SUB	" subl "
-# define _ASM_XADD	" xaddl "
+#ifdef __ASSEMBLY__
+# define __ASM_FORM(x)	x
+#else
+# define __ASM_FORM(x)	" " #x " "
+#endif
 
+#ifdef CONFIG_X86_32
+# define __ASM_SEL(a,b) __ASM_FORM(a)
 #else
-/* 64 bits */
+# define __ASM_SEL(a,b) __ASM_FORM(b)
+#endif
 
-# define _ASM_PTR	" .quad "
-# define _ASM_ALIGN	" .balign 8 "
-# define _ASM_MOV_UL	" movq "
+#define __ASM_SIZE(inst)	__ASM_SEL(inst##l, inst##q)
 
-# define _ASM_INC	" incq "
-# define _ASM_DEC	" decq "
-# define _ASM_ADD	" addq "
-# define _ASM_SUB	" subq "
-# define _ASM_XADD	" xaddq "
+#define _ASM_PTR	__ASM_SEL(.long, .quad)
+#define _ASM_ALIGN	__ASM_SEL(.balign 4, .balign 8)
+#define _ASM_MOV_UL	__ASM_SIZE(mov)
 
-#endif /* CONFIG_X86_32 */
+#define _ASM_INC	__ASM_SIZE(inc)
+#define _ASM_DEC	__ASM_SIZE(dec)
+#define _ASM_ADD	__ASM_SIZE(add)
+#define _ASM_SUB	__ASM_SIZE(sub)
+#define _ASM_XADD	__ASM_SIZE(xadd)
 
 /* Exception table entry */
 # define _ASM_EXTABLE(from,to) \
-- 
cgit v1.2.3-70-g09d2


From 0db125c467afcbcc229abb1a87bc36ef72777dc2 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Tue, 10 Jun 2008 23:45:45 +0200
Subject: x86: more header fixes

Summary: Add missing include guards for some x86 headers.

This has only had the most rudimentary testing, but is hopefully obviously
correct.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/seccomp_64.h | 1 +
 include/asm-x86/suspend_32.h | 5 +++++
 include/asm-x86/xor_32.h     | 5 +++++
 include/asm-x86/xor_64.h     | 5 +++++
 4 files changed, 16 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/seccomp_64.h b/include/asm-x86/seccomp_64.h
index 553af65a228..76cfe69aa63 100644
--- a/include/asm-x86/seccomp_64.h
+++ b/include/asm-x86/seccomp_64.h
@@ -1,4 +1,5 @@
 #ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
 
 #include <linux/thread_info.h>
 
diff --git a/include/asm-x86/suspend_32.h b/include/asm-x86/suspend_32.h
index 24e1c080aa8..8675c6782a7 100644
--- a/include/asm-x86/suspend_32.h
+++ b/include/asm-x86/suspend_32.h
@@ -3,6 +3,9 @@
  * Based on code
  * Copyright 2001 Patrick Mochel <mochel@osdl.org>
  */
+#ifndef __ASM_X86_32_SUSPEND_H
+#define __ASM_X86_32_SUSPEND_H
+
 #include <asm/desc.h>
 #include <asm/i387.h>
 
@@ -44,3 +47,5 @@ static inline void acpi_save_register_state(unsigned long return_point)
 /* routines for saving/restoring kernel state */
 extern int acpi_save_state_mem(void);
 #endif
+
+#endif /* __ASM_X86_32_SUSPEND_H */
diff --git a/include/asm-x86/xor_32.h b/include/asm-x86/xor_32.h
index 067b5c1835a..921b4584044 100644
--- a/include/asm-x86/xor_32.h
+++ b/include/asm-x86/xor_32.h
@@ -1,3 +1,6 @@
+#ifndef ASM_X86__XOR_32_H
+#define ASM_X86__XOR_32_H
+
 /*
  * Optimized RAID-5 checksumming functions for MMX and SSE.
  *
@@ -881,3 +884,5 @@ do {							\
    deals with a load to a line that is being prefetched.  */
 #define XOR_SELECT_TEMPLATE(FASTEST)			\
 	(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
+
+#endif /* ASM_X86__XOR_32_H */
diff --git a/include/asm-x86/xor_64.h b/include/asm-x86/xor_64.h
index 24957e39ac8..2d3a18de295 100644
--- a/include/asm-x86/xor_64.h
+++ b/include/asm-x86/xor_64.h
@@ -1,3 +1,6 @@
+#ifndef ASM_X86__XOR_64_H
+#define ASM_X86__XOR_64_H
+
 /*
  * Optimized RAID-5 checksumming functions for MMX and SSE.
  *
@@ -354,3 +357,5 @@ do {						\
    We may also be able to load into the L1 only depending on how the cpu
    deals with a load to a line that is being prefetched.  */
 #define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse)
+
+#endif /* ASM_X86__XOR_64_H */
-- 
cgit v1.2.3-70-g09d2


From 1a750e0cd7a30c478723ecfa1df685efcdd38a90 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 18 Jun 2008 21:03:26 -0700
Subject: x86, bitops: make constant-bit set/clear_bit ops faster

On Wed, 18 Jun 2008, Linus Torvalds wrote:
>
> And yes, the "lock andl" should be noticeably faster than the xchgl.

I dunno. Here's a untested (!!) patch that turns constant-bit
set/clear_bit ops into byte mask ops (lock orb/andb).

It's not exactly pretty. The reason for using the byte versions is that a
locked op is serialized in the memory pipeline anyway, so there are no
forwarding issues (that could slow down things when we access things with
different sizes), and the byte ops are a lot smaller than 32-bit and
particularly 64-bit ops (big constants, and the 64-bit ops need the REX
prefix byte too).

[ Side note: I wonder if we should turn the "test_bit()" C version into a
  "char *" version too.. It could actually help with alias analysis, since
  char pointers can alias anything. So it might be the RightThing(tm) to
  do for multiple reasons. I dunno. It's a separate issue. ]

It does actually shrink the kernel image a bit (a couple of hundred bytes
on the text segment for my everything-compiled-in image), and while it's
totally untested the (admittedly few) code generation points I looked at
seemed sane. And "lock orb" should be noticeably faster than "lock bts".

If somebody wants to play with it, go wild. I didn't do "change_bit()",
because nobody sane uses that thing anyway. I guarantee nothing. And if it
breaks, nobody saw me do anything.  You can't prove this email wasn't sent
by somebody who is good at forging smtp.

This does require a gcc that is recent enough for "__builtin_constant_p()"
to work in an inline function, but I suspect our kernel requirements are
already higher than that. And if you do have an old gcc that is supported,
the worst that would happen is that the optimization doesn't trigger.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/bitops.h | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index 7d2494bdc66..ab7635a4acd 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -23,11 +23,22 @@
 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
 /* Technically wrong, but this avoids compilation errors on some gcc
    versions. */
-#define ADDR "=m" (*(volatile long *) addr)
+#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
 #else
-#define ADDR "+m" (*(volatile long *) addr)
+#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
 #endif
 
+#define ADDR BITOP_ADDR(addr)
+
+/*
+ * We do the locked ops that don't return the old value as
+ * a mask operation on a byte.
+ */
+#define IS_IMMEDIATE(nr) \
+	(__builtin_constant_p(nr))
+#define CONST_MASK_ADDR BITOP_ADDR(addr + (nr>>3))
+#define CONST_MASK (1 << (nr & 7))
+
 /**
  * set_bit - Atomically set a bit in memory
  * @nr: the bit to set
@@ -43,11 +54,15 @@
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
-static inline void set_bit(int nr, volatile unsigned long *addr)
+static inline void set_bit(unsigned int nr, volatile unsigned long *addr)
 {
-	asm volatile(LOCK_PREFIX "bts %1,%0" : ADDR : "Ir" (nr) : "memory");
+	if (IS_IMMEDIATE(nr))
+		asm volatile(LOCK_PREFIX "orb %1,%0" : CONST_MASK_ADDR : "i" (CONST_MASK) : "memory");
+	else
+		asm volatile(LOCK_PREFIX "bts %1,%0" : ADDR : "Ir" (nr) : "memory");
 }
 
+
 /**
  * __set_bit - Set a bit in memory
  * @nr: the bit to set
@@ -74,7 +89,10 @@ static inline void __set_bit(int nr, volatile unsigned long *addr)
  */
 static inline void clear_bit(int nr, volatile unsigned long *addr)
 {
-	asm volatile(LOCK_PREFIX "btr %1,%0" : ADDR : "Ir" (nr));
+	if (IS_IMMEDIATE(nr))
+		asm volatile(LOCK_PREFIX "andb %1,%0" : CONST_MASK_ADDR : "i" (~CONST_MASK));
+	else
+		asm volatile(LOCK_PREFIX "btr %1,%0" : ADDR : "Ir" (nr));
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 5f0120b5786f5dbe097a946a2eb5d745ebc2b7ed Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Wed, 18 Jun 2008 12:42:11 +0100
Subject: x86-64: remove unnecessary ptregs call stubs

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: "Andi Kleen" <andi@firstfloor.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32entry.S   | 6 ++----
 arch/x86/kernel/entry_64.S  | 1 -
 include/asm-x86/unistd_64.h | 2 +-
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index b5e329da166..3aefbce2de4 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -370,13 +370,11 @@ quiet_ni_syscall:
 	PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
 	PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
 	PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
-	PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx
 	PTREGSCALL stub32_execve, sys32_execve, %rcx
 	PTREGSCALL stub32_fork, sys_fork, %rdi
 	PTREGSCALL stub32_clone, sys32_clone, %rdx
 	PTREGSCALL stub32_vfork, sys_vfork, %rdi
 	PTREGSCALL stub32_iopl, sys_iopl, %rsi
-	PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx
 
 ENTRY(ia32_ptregs_common)
 	popq %r11
@@ -476,7 +474,7 @@ ia32_sys_call_table:
 	.quad sys_ssetmask
 	.quad sys_setreuid16	/* 70 */
 	.quad sys_setregid16
-	.quad stub32_sigsuspend
+	.quad sys32_sigsuspend
 	.quad compat_sys_sigpending
 	.quad sys_sethostname
 	.quad compat_sys_setrlimit	/* 75 */
@@ -583,7 +581,7 @@ ia32_sys_call_table:
 	.quad sys32_rt_sigpending
 	.quad compat_sys_rt_sigtimedwait
 	.quad sys32_rt_sigqueueinfo
-	.quad stub32_rt_sigsuspend
+	.quad sys_rt_sigsuspend
 	.quad sys32_pread		/* 180 */
 	.quad sys32_pwrite
 	.quad sys_chown16
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1f1751dfb2f..5cf0aa993f4 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -420,7 +420,6 @@ END(\label)
 	PTREGSCALL stub_clone, sys_clone, %r8
 	PTREGSCALL stub_fork, sys_fork, %rdi
 	PTREGSCALL stub_vfork, sys_vfork, %rdi
-	PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
 	PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
 	PTREGSCALL stub_iopl, sys_iopl, %rsi
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index fe26e36d0f5..9c1a4a3470d 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -290,7 +290,7 @@ __SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait)
 #define __NR_rt_sigqueueinfo			129
 __SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo)
 #define __NR_rt_sigsuspend			130
-__SYSCALL(__NR_rt_sigsuspend, stub_rt_sigsuspend)
+__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend)
 #define __NR_sigaltstack			131
 __SYSCALL(__NR_sigaltstack, stub_sigaltstack)
 #define __NR_utime				132
-- 
cgit v1.2.3-70-g09d2


From 7dbceaf9bb68919651901b101f44edd5391ee489 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 20 Jun 2008 07:28:24 +0200
Subject: x86, bitops: make constant-bit set/clear_bit ops faster, adapt, clean
 up

fix integration bug introduced by "x86: bitops take an unsigned long *"
which turned "(void *) + x" into "(long *) + x".

small cleanups to make it more apparent which value get propagated where.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/bitops.h | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index ab7635a4acd..6c505481971 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -28,16 +28,15 @@
 #define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
 #endif
 
-#define ADDR BITOP_ADDR(addr)
+#define ADDR				BITOP_ADDR(addr)
 
 /*
  * We do the locked ops that don't return the old value as
  * a mask operation on a byte.
  */
-#define IS_IMMEDIATE(nr) \
-	(__builtin_constant_p(nr))
-#define CONST_MASK_ADDR BITOP_ADDR(addr + (nr>>3))
-#define CONST_MASK (1 << (nr & 7))
+#define IS_IMMEDIATE(nr)		(__builtin_constant_p(nr))
+#define CONST_MASK_ADDR(nr, addr)	BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK(nr)			(1 << ((nr) & 7))
 
 /**
  * set_bit - Atomically set a bit in memory
@@ -56,13 +55,17 @@
  */
 static inline void set_bit(unsigned int nr, volatile unsigned long *addr)
 {
-	if (IS_IMMEDIATE(nr))
-		asm volatile(LOCK_PREFIX "orb %1,%0" : CONST_MASK_ADDR : "i" (CONST_MASK) : "memory");
-	else
-		asm volatile(LOCK_PREFIX "bts %1,%0" : ADDR : "Ir" (nr) : "memory");
+	if (IS_IMMEDIATE(nr)) {
+		asm volatile(LOCK_PREFIX "orb %1,%0"
+			: CONST_MASK_ADDR(nr, addr)
+			: "i" (CONST_MASK(nr))
+			: "memory");
+	} else {
+		asm volatile(LOCK_PREFIX "bts %1,%0"
+			: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
+	}
 }
 
-
 /**
  * __set_bit - Set a bit in memory
  * @nr: the bit to set
@@ -89,10 +92,15 @@ static inline void __set_bit(int nr, volatile unsigned long *addr)
  */
 static inline void clear_bit(int nr, volatile unsigned long *addr)
 {
-	if (IS_IMMEDIATE(nr))
-		asm volatile(LOCK_PREFIX "andb %1,%0" : CONST_MASK_ADDR : "i" (~CONST_MASK));
-	else
-		asm volatile(LOCK_PREFIX "btr %1,%0" : ADDR : "Ir" (nr));
+	if (IS_IMMEDIATE(nr)) {
+		asm volatile(LOCK_PREFIX "andb %1,%0"
+			: CONST_MASK_ADDR(nr, addr)
+			: "i" (~CONST_MASK(nr)));
+	} else {
+		asm volatile(LOCK_PREFIX "btr %1,%0"
+			: BITOP_ADDR(addr)
+			: "Ir" (nr));
+	}
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 944256e00a5466ae6b7a11fdb3a47d092f2f62c1 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 17 Jun 2008 11:41:49 -0700
Subject: x86: unify asm-x86/fixmap*.h

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/fixmap.h    | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/fixmap_32.h | 42 ------------------------------------------
 include/asm-x86/fixmap_64.h | 37 -------------------------------------
 3 files changed, 43 insertions(+), 79 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/fixmap.h b/include/asm-x86/fixmap.h
index 5bd206973dc..01b0f98f992 100644
--- a/include/asm-x86/fixmap.h
+++ b/include/asm-x86/fixmap.h
@@ -7,7 +7,50 @@
 # include "fixmap_64.h"
 #endif
 
+extern void __set_fixmap(enum fixed_addresses idx,
+			 unsigned long phys, pgprot_t flags);
+#define set_fixmap(idx, phys)				\
+	__set_fixmap(idx, phys, PAGE_KERNEL)
+
+/*
+ * Some hardware wants to get fixmapped without caching.
+ */
+#define set_fixmap_nocache(idx, phys)			\
+	__set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+
 #define clear_fixmap(idx)			\
 	__set_fixmap(idx, 0, __pgprot(0))
 
+#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x)	((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
+extern void __this_fixmap_does_not_exist(void);
+
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without translation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static __always_inline unsigned long fix_to_virt(const unsigned int idx)
+{
+	/*
+	 * this branch gets completely eliminated after inlining,
+	 * except when someone tries to use fixaddr indices in an
+	 * illegal way. (such as mixing up address types or using
+	 * out-of-range indices).
+	 *
+	 * If it doesn't get removed, the linker will complain
+	 * loudly with a reasonably clear error message..
+	 */
+	if (idx >= __end_of_fixed_addresses)
+		__this_fixmap_does_not_exist();
+
+	return __fix_to_virt(idx);
+}
+
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
+{
+	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+	return __virt_to_fix(vaddr);
+}
 #endif
diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h
index 4b96148e90c..3a94115952f 100644
--- a/include/asm-x86/fixmap_32.h
+++ b/include/asm-x86/fixmap_32.h
@@ -109,17 +109,8 @@ enum fixed_addresses {
 	__end_of_fixed_addresses
 };
 
-extern void __set_fixmap(enum fixed_addresses idx,
-			 unsigned long phys, pgprot_t flags);
 extern void reserve_top_address(unsigned long reserve);
 
-#define set_fixmap(idx, phys)				\
-	__set_fixmap(idx, phys, PAGE_KERNEL)
-/*
- * Some hardware wants to get fixmapped without caching.
- */
-#define set_fixmap_nocache(idx, phys)			\
-	__set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
 
 #define FIXADDR_TOP	((unsigned long)__FIXADDR_TOP)
 
@@ -128,38 +119,5 @@ extern void reserve_top_address(unsigned long reserve);
 #define FIXADDR_START		(FIXADDR_TOP - __FIXADDR_SIZE)
 #define FIXADDR_BOOT_START	(FIXADDR_TOP - __FIXADDR_BOOT_SIZE)
 
-#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x)	((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-
-extern void __this_fixmap_does_not_exist(void);
-
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without tranlation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static __always_inline unsigned long fix_to_virt(const unsigned int idx)
-{
-	/*
-	 * this branch gets completely eliminated after inlining,
-	 * except when someone tries to use fixaddr indices in an
-	 * illegal way. (such as mixing up address types or using
-	 * out-of-range indices).
-	 *
-	 * If it doesn't get removed, the linker will complain
-	 * loudly with a reasonably clear error message..
-	 */
-	if (idx >= __end_of_fixed_addresses)
-		__this_fixmap_does_not_exist();
-
-	return __fix_to_virt(idx);
-}
-
-static inline unsigned long virt_to_fix(const unsigned long vaddr)
-{
-	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
-	return __virt_to_fix(vaddr);
-}
-
 #endif /* !__ASSEMBLY__ */
 #endif
diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h
index 355d26a75a8..626098823a0 100644
--- a/include/asm-x86/fixmap_64.h
+++ b/include/asm-x86/fixmap_64.h
@@ -52,17 +52,6 @@ enum fixed_addresses {
 	__end_of_fixed_addresses
 };
 
-extern void __set_fixmap(enum fixed_addresses idx,
-			 unsigned long phys, pgprot_t flags);
-
-#define set_fixmap(idx, phys)			\
-	__set_fixmap(idx, phys, PAGE_KERNEL)
-/*
- * Some hardware wants to get fixmapped without caching.
- */
-#define set_fixmap_nocache(idx, phys)			\
-	__set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
-
 #define FIXADDR_TOP	(VSYSCALL_END-PAGE_SIZE)
 #define FIXADDR_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
@@ -71,30 +60,4 @@ extern void __set_fixmap(enum fixed_addresses idx,
 #define FIXADDR_USER_START	((unsigned long)VSYSCALL32_VSYSCALL)
 #define FIXADDR_USER_END	(FIXADDR_USER_START + PAGE_SIZE)
 
-#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
-
-extern void __this_fixmap_does_not_exist(void);
-
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without translation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static __always_inline unsigned long fix_to_virt(const unsigned int idx)
-{
-	/*
-	 * this branch gets completely eliminated after inlining,
-	 * except when someone tries to use fixaddr indices in an
-	 * illegal way. (such as mixing up address types or using
-	 * out-of-range indices).
-	 *
-	 * If it doesn't get removed, the linker will complain
-	 * loudly with a reasonably clear error message..
-	 */
-	if (idx >= __end_of_fixed_addresses)
-		__this_fixmap_does_not_exist();
-
-	return __fix_to_virt(idx);
-}
-
 #endif
-- 
cgit v1.2.3-70-g09d2


From 7c7e6e07e2a7c0d2d96389f4f0540e44a80ecdaa Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 17 Jun 2008 11:41:54 -0700
Subject: x86: unify __set_fixmap

In both cases, I went with the 32-bit behaviour.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c    | 12 ------------
 arch/x86/mm/pgtable.c    | 14 ++++++++++++++
 arch/x86/mm/pgtable_32.c | 14 +-------------
 include/asm-x86/fixmap.h |  2 ++
 4 files changed, 17 insertions(+), 25 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 156e6d7b0e3..e5f53194985 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -213,18 +213,6 @@ void __init cleanup_highmap(void)
 	}
 }
 
-/* NOTE: this is meant to be run only at boot */
-void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
-{
-	unsigned long address = __fix_to_virt(idx);
-
-	if (idx >= __end_of_fixed_addresses) {
-		printk(KERN_ERR "Invalid __set_fixmap\n");
-		return;
-	}
-	set_pte_phys(address, phys, prot);
-}
-
 static unsigned long __initdata table_start;
 static unsigned long __meminitdata table_end;
 
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 50159764f69..3ebebe480b5 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -274,3 +274,17 @@ int ptep_clear_flush_young(struct vm_area_struct *vma,
 
 	return young;
 }
+
+int fixmaps_set;
+
+void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
+{
+	unsigned long address = __fix_to_virt(idx);
+
+	if (idx >= __end_of_fixed_addresses) {
+		BUG();
+		return;
+	}
+	set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+	fixmaps_set++;
+}
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 369cf065b6a..3f97c3c8728 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -145,18 +145,6 @@ static int fixmaps;
 unsigned long __FIXADDR_TOP = 0xfffff000;
 EXPORT_SYMBOL(__FIXADDR_TOP);
 
-void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
-{
-	unsigned long address = __fix_to_virt(idx);
-
-	if (idx >= __end_of_fixed_addresses) {
-		BUG();
-		return;
-	}
-	set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
-	fixmaps++;
-}
-
 /**
  * reserve_top_address - reserves a hole in the top of kernel address space
  * @reserve - size of hole to reserve
@@ -166,7 +154,7 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
  */
 void reserve_top_address(unsigned long reserve)
 {
-	BUG_ON(fixmaps > 0);
+	BUG_ON(fixmaps_set > 0);
 	printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
 	       (int)-reserve);
 	__FIXADDR_TOP = -reserve - PAGE_SIZE;
diff --git a/include/asm-x86/fixmap.h b/include/asm-x86/fixmap.h
index 01b0f98f992..934d6b49b53 100644
--- a/include/asm-x86/fixmap.h
+++ b/include/asm-x86/fixmap.h
@@ -7,6 +7,8 @@
 # include "fixmap_64.h"
 #endif
 
+extern int fixmaps_set;
+
 extern void __set_fixmap(enum fixed_addresses idx,
 			 unsigned long phys, pgprot_t flags);
 #define set_fixmap(idx, phys)				\
-- 
cgit v1.2.3-70-g09d2


From d494a96125c99f1e37b1f831b29b42c9b712ee05 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 17 Jun 2008 11:41:59 -0700
Subject: x86: implement set_pte_vaddr

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c     | 9 ++++-----
 arch/x86/mm/pgtable.c     | 2 +-
 arch/x86/mm/pgtable_32.c  | 6 +++---
 include/asm-x86/pgtable.h | 3 +++
 4 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e5f53194985..74fae833512 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -135,15 +135,15 @@ static __init void *spp_getpage(void)
 	return ptr;
 }
 
-static void
-set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
+void
+set_pte_vaddr(unsigned long vaddr, pte_t new_pte)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
-	pte_t *pte, new_pte;
+	pte_t *pte;
 
-	pr_debug("set_pte_phys %lx to %lx\n", vaddr, phys);
+	pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(new_pte));
 
 	pgd = pgd_offset_k(vaddr);
 	if (pgd_none(*pgd)) {
@@ -170,7 +170,6 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
 			return;
 		}
 	}
-	new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
 
 	pte = pte_offset_kernel(pmd, vaddr);
 	if (!pte_none(*pte) && pte_val(new_pte) &&
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 3ebebe480b5..7498124e30f 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -285,6 +285,6 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
 		BUG();
 		return;
 	}
-	set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+	set_pte_vaddr(address, pfn_pte(phys >> PAGE_SHIFT, flags));
 	fixmaps_set++;
 }
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 3f97c3c8728..0662f345212 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -71,7 +71,7 @@ void show_mem(void)
  * Associate a virtual page frame with a given physical page frame 
  * and protection flags for that frame.
  */ 
-static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
+void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -94,8 +94,8 @@ static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
 		return;
 	}
 	pte = pte_offset_kernel(pmd, vaddr);
-	if (pgprot_val(flags))
-		set_pte_present(&init_mm, vaddr, pte, pfn_pte(pfn, flags));
+	if (pte_val(pteval))
+		set_pte_present(&init_mm, vaddr, pte, pteval);
 	else
 		pte_clear(&init_mm, vaddr, pte);
 
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 97c271b2910..702f2699c6d 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -318,6 +318,9 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
                               unsigned long size, pgprot_t *vma_prot);
 #endif
 
+/* Install a pte for a particular vaddr in kernel space. */
+void set_pte_vaddr(unsigned long vaddr, pte_t pte);
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else  /* !CONFIG_PARAVIRT */
-- 
cgit v1.2.3-70-g09d2


From aeaaa59c7e15dcfaaf57ce069ef81683067d575d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 17 Jun 2008 11:42:01 -0700
Subject: x86/paravirt/xen: add set_fixmap pv_mmu_ops

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/paravirt.c |  2 ++
 arch/x86/mm/pgtable.c      |  9 +++++++--
 arch/x86/xen/enlighten.c   | 29 +++++++++++++++++++++++++++++
 include/asm-x86/fixmap.h   | 14 ++++++++++++--
 include/asm-x86/paravirt.h | 13 +++++++++++++
 5 files changed, 63 insertions(+), 4 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 74f0c5ea2a0..cf06670349d 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -416,6 +416,8 @@ struct pv_mmu_ops pv_mmu_ops = {
 		.enter = paravirt_nop,
 		.leave = paravirt_nop,
 	},
+
+	.set_fixmap = native_set_fixmap,
 };
 
 EXPORT_SYMBOL_GPL(pv_time_ops);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 7498124e30f..e9fb66361fc 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -277,7 +277,7 @@ int ptep_clear_flush_young(struct vm_area_struct *vma,
 
 int fixmaps_set;
 
-void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
+void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
 {
 	unsigned long address = __fix_to_virt(idx);
 
@@ -285,6 +285,11 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
 		BUG();
 		return;
 	}
-	set_pte_vaddr(address, pfn_pte(phys >> PAGE_SHIFT, flags));
+	set_pte_vaddr(address, pte);
 	fixmaps_set++;
 }
+
+void native_set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
+{
+	__native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
+}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c8a56e457d6..0ad8a64a2e0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -960,6 +960,33 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
 	return ret;
 }
 
+static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
+{
+	pte_t pte;
+
+	phys >>= PAGE_SHIFT;
+
+	switch (idx) {
+	case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
+#ifdef CONFIG_X86_F00F_BUG
+	case FIX_F00F_IDT:
+#endif
+	case FIX_WP_TEST:
+	case FIX_VDSO:
+#ifdef CONFIG_X86_LOCAL_APIC
+	case FIX_APIC_BASE:	/* maps dummy local APIC */
+#endif
+		pte = pfn_pte(phys, prot);
+		break;
+
+	default:
+		pte = mfn_pte(phys, prot);
+		break;
+	}
+
+	__native_set_fixmap(idx, pte);
+}
+
 static const struct pv_info xen_info __initdata = {
 	.paravirt_enabled = 1,
 	.shared_kernel_pmd = 0,
@@ -1112,6 +1139,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 		.enter = paravirt_enter_lazy_mmu,
 		.leave = xen_leave_lazy,
 	},
+
+	.set_fixmap = xen_set_fixmap,
 };
 
 #ifdef CONFIG_SMP
diff --git a/include/asm-x86/fixmap.h b/include/asm-x86/fixmap.h
index 934d6b49b53..44d4f821734 100644
--- a/include/asm-x86/fixmap.h
+++ b/include/asm-x86/fixmap.h
@@ -9,8 +9,18 @@
 
 extern int fixmaps_set;
 
-extern void __set_fixmap(enum fixed_addresses idx,
-			 unsigned long phys, pgprot_t flags);
+void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
+void native_set_fixmap(enum fixed_addresses idx,
+		       unsigned long phys, pgprot_t flags);
+
+#ifndef CONFIG_PARAVIRT
+static inline void __set_fixmap(enum fixed_addresses idx,
+				unsigned long phys, pgprot_t flags)
+{
+	native_set_fixmap(idx, phys, flags);
+}
+#endif
+
 #define set_fixmap(idx, phys)				\
 	__set_fixmap(idx, phys, PAGE_KERNEL)
 
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 0f13b945e24..af85caf9a79 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -273,6 +273,13 @@ struct pv_mmu_ops {
 #endif
 
 	struct pv_lazy_ops lazy_mode;
+
+	/* dom0 ops */
+
+	/* Sometimes the physical address is a pfn, and sometimes its
+	   an mfn.  We can tell which is which from the index. */
+	void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
+			   unsigned long phys, pgprot_t flags);
 };
 
 /* This contains all the paravirt structures: we get a convenient
@@ -1252,6 +1259,12 @@ static inline void arch_flush_lazy_mmu_mode(void)
 	}
 }
 
+static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
+				unsigned long phys, pgprot_t flags)
+{
+	pv_mmu_ops.set_fixmap(idx, phys, flags);
+}
+
 void _paravirt_nop(void);
 #define paravirt_nop	((void *)_paravirt_nop)
 
-- 
cgit v1.2.3-70-g09d2


From 437a0a54eea7b101e8a5b70688009956f6522ed0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 20 Jun 2008 21:50:20 +0200
Subject: x86, bitops: make constant-bit set/clear_bit ops faster, gcc
 workaround

Jeremy Fitzhardinge reported this compiler bug:

Suggestion from Linus: add "r" to the input constraint of the
set_bit()/clear_bit()'s constant 'nr' branch:

Blows up on "gcc version 3.4.4 20050314 (prerelease) (Debian 3.4.3-13)":

 CC      init/main.o
include2/asm/bitops.h: In function `start_kernel':
include2/asm/bitops.h:59: warning: asm operand 1 probably doesn't match constraints
include2/asm/bitops.h:59: warning: asm operand 1 probably doesn't match constraints
include2/asm/bitops.h:59: warning: asm operand 1 probably doesn't match constraints
include2/asm/bitops.h:59: error: impossible constraint in `asm'
include2/asm/bitops.h:59: error: impossible constraint in `asm'
include2/asm/bitops.h:59: error: impossible constraint in `asm'

Reported-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/bitops.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index 6c505481971..96b1829cea1 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -58,7 +58,7 @@ static inline void set_bit(unsigned int nr, volatile unsigned long *addr)
 	if (IS_IMMEDIATE(nr)) {
 		asm volatile(LOCK_PREFIX "orb %1,%0"
 			: CONST_MASK_ADDR(nr, addr)
-			: "i" (CONST_MASK(nr))
+			: "iq" ((u8)CONST_MASK(nr))
 			: "memory");
 	} else {
 		asm volatile(LOCK_PREFIX "bts %1,%0"
@@ -95,7 +95,7 @@ static inline void clear_bit(int nr, volatile unsigned long *addr)
 	if (IS_IMMEDIATE(nr)) {
 		asm volatile(LOCK_PREFIX "andb %1,%0"
 			: CONST_MASK_ADDR(nr, addr)
-			: "i" (~CONST_MASK(nr)));
+			: "iq" ((u8)~CONST_MASK(nr)));
 	} else {
 		asm volatile(LOCK_PREFIX "btr %1,%0"
 			: BITOP_ADDR(addr)
-- 
cgit v1.2.3-70-g09d2


From 395a59d0f8e86bb39cd700c3d185d30c670bb958 Mon Sep 17 00:00:00 2001
From: Abhishek Sagar <sagar.abhishek@gmail.com>
Date: Sat, 21 Jun 2008 23:47:27 +0530
Subject: ftrace: store mcount address in rec->ip

Record the address of the mcount call-site. Currently all archs except sparc64
record the address of the instruction following the mcount call-site. Some
general cleanups are entailed. Storing mcount addresses in rec->ip enables
looking them up in the kprobe hash table later on to check if they're kprobe'd.

Signed-off-by: Abhishek Sagar <sagar.abhishek@gmail.com>
Cc: davem@davemloft.net
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/arm/kernel/armksyms.c          | 10 +++++-----
 arch/arm/kernel/entry-common.S      |  4 ++++
 arch/arm/kernel/ftrace.c            | 16 +++++++---------
 arch/powerpc/kernel/entry_32.S      |  4 ++++
 arch/powerpc/kernel/entry_64.S      |  5 ++++-
 arch/powerpc/kernel/ftrace.c        | 21 +++++++--------------
 arch/sparc64/kernel/ftrace.c        | 10 ++++++----
 arch/sparc64/kernel/sparc64_ksyms.c |  2 +-
 arch/x86/kernel/entry_32.S          |  4 ++++
 arch/x86/kernel/entry_64.S          |  4 ++++
 arch/x86/kernel/ftrace.c            | 26 +++++++++-----------------
 arch/x86/kernel/i386_ksyms_32.c     |  2 +-
 arch/x86/kernel/x8664_ksyms_64.c    |  2 +-
 include/asm-arm/ftrace.h            | 14 ++++++++++++++
 include/asm-powerpc/ftrace.h        |  8 ++++++++
 include/asm-sparc64/ftrace.h        | 14 ++++++++++++++
 include/asm-x86/ftrace.h            | 14 ++++++++++++++
 include/linux/ftrace.h              |  3 +--
 kernel/trace/ftrace.c               |  3 ++-
 19 files changed, 110 insertions(+), 56 deletions(-)
 create mode 100644 include/asm-arm/ftrace.h
 create mode 100644 include/asm-sparc64/ftrace.h
 create mode 100644 include/asm-x86/ftrace.h

(limited to 'include/asm-x86')

diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 3b132215cbf..cc7b246e965 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -18,6 +18,7 @@
 #include <asm/io.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <asm/ftrace.h>
 
 /*
  * libgcc functions - functions that are used internally by the
@@ -48,11 +49,6 @@ extern void __aeabi_ulcmp(void);
 extern void fpundefinstr(void);
 extern void fp_enter(void);
 
-#ifdef CONFIG_FTRACE
-extern void mcount(void);
-EXPORT_SYMBOL(mcount);
-#endif
-
 /*
  * This has a special calling convention; it doesn't
  * modify any of the usual registers, except for LR.
@@ -186,3 +182,7 @@ EXPORT_SYMBOL(_find_next_bit_be);
 #endif
 
 EXPORT_SYMBOL(copy_page);
+
+#ifdef CONFIG_FTRACE
+EXPORT_SYMBOL(mcount);
+#endif
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 8f79a4789ed..84694e88b42 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -9,6 +9,7 @@
  */
 
 #include <asm/unistd.h>
+#include <asm/ftrace.h>
 #include <asm/arch/entry-macro.S>
 
 #include "entry-header.S"
@@ -104,6 +105,7 @@ ENTRY(ret_from_fork)
 ENTRY(mcount)
 	stmdb sp!, {r0-r3, lr}
 	mov r0, lr
+	sub r0, r0, #MCOUNT_INSN_SIZE
 
 	.globl mcount_call
 mcount_call:
@@ -114,6 +116,7 @@ ENTRY(ftrace_caller)
 	stmdb sp!, {r0-r3, lr}
 	ldr r1, [fp, #-4]
 	mov r0, lr
+	sub r0, r0, #MCOUNT_INSN_SIZE
 
 	.globl ftrace_call
 ftrace_call:
@@ -134,6 +137,7 @@ ENTRY(mcount)
 trace:
 	ldr r1, [fp, #-4]
 	mov r0, lr
+	sub r0, r0, #MCOUNT_INSN_SIZE
 	mov lr, pc
 	mov pc, r2
 	ldmia sp!, {r0-r3, pc}
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 22f3d6e309f..76d50e6091b 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -12,9 +12,10 @@
  */
 
 #include <linux/ftrace.h>
+
 #include <asm/cacheflush.h>
+#include <asm/ftrace.h>
 
-#define INSN_SIZE      4
 #define PC_OFFSET      8
 #define BL_OPCODE      0xeb000000
 #define BL_OFFSET_MASK 0x00ffffff
@@ -32,10 +33,10 @@ unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr)
 {
 	long offset;
 
-	offset = (long)addr - (long)(pc - INSN_SIZE + PC_OFFSET);
+	offset = (long)addr - (long)(pc + PC_OFFSET);
 	if (unlikely(offset < -33554432 || offset > 33554428)) {
 		/* Can't generate branches that far (from ARM ARM). Ftrace
-		 * doesn't generate branches outside of core kernel text.
+		 * doesn't generate branches outside of kernel text.
 		 */
 		WARN_ON_ONCE(1);
 		return NULL;
@@ -52,7 +53,6 @@ int ftrace_modify_code(unsigned long pc, unsigned char *old_code,
 
 	old = *(unsigned long *)old_code;
 	new = *(unsigned long *)new_code;
-	pc -= INSN_SIZE;
 
 	__asm__ __volatile__ (
 		"1:  ldr    %1, [%2]  \n"
@@ -77,7 +77,7 @@ int ftrace_modify_code(unsigned long pc, unsigned char *old_code,
 		: "memory");
 
 	if (!err && (replaced == old))
-		flush_icache_range(pc, pc + INSN_SIZE);
+		flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
 
 	return err;
 }
@@ -89,8 +89,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 	unsigned char *new;
 
 	pc = (unsigned long)&ftrace_call;
-	pc += INSN_SIZE;
-	memcpy(&old, &ftrace_call, INSN_SIZE);
+	memcpy(&old, &ftrace_call, MCOUNT_INSN_SIZE);
 	new = ftrace_call_replace(pc, (unsigned long)func);
 	ret = ftrace_modify_code(pc, (unsigned char *)&old, new);
 	return ret;
@@ -103,8 +102,7 @@ int ftrace_mcount_set(unsigned long *data)
 	unsigned char *new;
 
 	pc = (unsigned long)&mcount_call;
-	pc += INSN_SIZE;
-	memcpy(&old, &mcount_call, INSN_SIZE);
+	memcpy(&old, &mcount_call, MCOUNT_INSN_SIZE);
 	new = ftrace_call_replace(pc, *addr);
 	*addr = ftrace_modify_code(pc, (unsigned char *)&old, new);
 	return 0;
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 3b1dd29d9f9..7231a708af0 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -30,6 +30,7 @@
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/unistd.h>
+#include <asm/ftrace.h>
 
 #undef SHOW_SYSCALLS
 #undef SHOW_SYSCALLS_TASK
@@ -1053,6 +1054,7 @@ _GLOBAL(_mcount)
 	stw	r10,40(r1)
 	stw	r3, 44(r1)
 	stw	r5, 8(r1)
+	subi	r3, r3, MCOUNT_INSN_SIZE
 	.globl mcount_call
 mcount_call:
 	bl	ftrace_stub
@@ -1090,6 +1092,7 @@ _GLOBAL(ftrace_caller)
 	stw	r10,40(r1)
 	stw	r3, 44(r1)
 	stw	r5, 8(r1)
+	subi	r3, r3, MCOUNT_INSN_SIZE
 .globl ftrace_call
 ftrace_call:
 	bl	ftrace_stub
@@ -1128,6 +1131,7 @@ _GLOBAL(_mcount)
 	stw	r3, 44(r1)
 	stw	r5, 8(r1)
 
+	subi	r3, r3, MCOUNT_INSN_SIZE
 	LOAD_REG_ADDR(r5, ftrace_trace_function)
 	lwz	r5,0(r5)
 
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2c4d9e056ea..2f511a969d2 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -31,6 +31,7 @@
 #include <asm/bug.h>
 #include <asm/ptrace.h>
 #include <asm/irqflags.h>
+#include <asm/ftrace.h>
 
 /*
  * System calls.
@@ -879,6 +880,7 @@ _GLOBAL(_mcount)
 	mflr	r3
 	stdu	r1, -112(r1)
 	std	r3, 128(r1)
+	subi	r3, r3, MCOUNT_INSN_SIZE
 	.globl mcount_call
 mcount_call:
 	bl	ftrace_stub
@@ -895,6 +897,7 @@ _GLOBAL(ftrace_caller)
 	stdu	r1, -112(r1)
 	std	r3, 128(r1)
 	ld	r4, 16(r11)
+	subi	r3, r3, MCOUNT_INSN_SIZE
 .globl ftrace_call
 ftrace_call:
 	bl	ftrace_stub
@@ -916,7 +919,7 @@ _GLOBAL(_mcount)
 	std	r3, 128(r1)
 	ld	r4, 16(r11)
 
-
+	subi	r3, r3, MCOUNT_INSN_SIZE
 	LOAD_REG_ADDR(r5,ftrace_trace_function)
 	ld	r5,0(r5)
 	ld	r5,0(r5)
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index e12c593ab9c..3855ceb937b 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -15,8 +15,8 @@
 #include <linux/list.h>
 
 #include <asm/cacheflush.h>
+#include <asm/ftrace.h>
 
-#define CALL_BACK		4
 
 static unsigned int ftrace_nop = 0x60000000;
 
@@ -27,9 +27,10 @@ static unsigned int ftrace_nop = 0x60000000;
 # define GET_ADDR(addr) *(unsigned long *)addr
 #endif
 
+
 static unsigned int notrace ftrace_calc_offset(long ip, long addr)
 {
-	return (int)((addr + CALL_BACK) - ip);
+	return (int)(addr - ip);
 }
 
 notrace unsigned char *ftrace_nop_replace(void)
@@ -76,9 +77,6 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	unsigned new = *(unsigned *)new_code;
 	int faulted = 0;
 
-	/* move the IP back to the start of the call */
-	ip -= CALL_BACK;
-
 	/*
 	 * Note: Due to modules and __init, code can
 	 *  disappear and change, we need to protect against faulting
@@ -118,12 +116,10 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 notrace int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	unsigned long ip = (unsigned long)(&ftrace_call);
-	unsigned char old[4], *new;
+	unsigned char old[MCOUNT_INSN_SIZE], *new;
 	int ret;
 
-	ip += CALL_BACK;
-
-	memcpy(old, &ftrace_call, 4);
+	memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
 	new = ftrace_call_replace(ip, (unsigned long)func);
 	ret = ftrace_modify_code(ip, old, new);
 
@@ -134,16 +130,13 @@ notrace int ftrace_mcount_set(unsigned long *data)
 {
 	unsigned long ip = (long)(&mcount_call);
 	unsigned long *addr = data;
-	unsigned char old[4], *new;
-
-	/* ip is at the location, but modify code will subtact this */
-	ip += CALL_BACK;
+	unsigned char old[MCOUNT_INSN_SIZE], *new;
 
 	/*
 	 * Replace the mcount stub with a pointer to the
 	 * ip recorder function.
 	 */
-	memcpy(old, &mcount_call, 4);
+	memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
 	new = ftrace_call_replace(ip, *addr);
 	*addr = ftrace_modify_code(ip, old, new);
 
diff --git a/arch/sparc64/kernel/ftrace.c b/arch/sparc64/kernel/ftrace.c
index c17373195b1..4298d0aee71 100644
--- a/arch/sparc64/kernel/ftrace.c
+++ b/arch/sparc64/kernel/ftrace.c
@@ -5,6 +5,8 @@
 #include <linux/init.h>
 #include <linux/list.h>
 
+#include <asm/ftrace.h>
+
 static const u32 ftrace_nop = 0x01000000;
 
 notrace unsigned char *ftrace_nop_replace(void)
@@ -60,9 +62,9 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 notrace int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	unsigned long ip = (unsigned long)(&ftrace_call);
-	unsigned char old[4], *new;
+	unsigned char old[MCOUNT_INSN_SIZE], *new;
 
-	memcpy(old, &ftrace_call, 4);
+	memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
 	new = ftrace_call_replace(ip, (unsigned long)func);
 	return ftrace_modify_code(ip, old, new);
 }
@@ -71,13 +73,13 @@ notrace int ftrace_mcount_set(unsigned long *data)
 {
 	unsigned long ip = (long)(&mcount_call);
 	unsigned long *addr = data;
-	unsigned char old[4], *new;
+	unsigned char old[MCOUNT_INSN_SIZE], *new;
 
 	/*
 	 * Replace the mcount stub with a pointer to the
 	 * ip recorder function.
 	 */
-	memcpy(old, &mcount_call, 4);
+	memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
 	new = ftrace_call_replace(ip, *addr);
 	*addr = ftrace_modify_code(ip, old, new);
 
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 8ac0b99f2c5..b80d982a29c 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -53,6 +53,7 @@
 #include <asm/ns87303.h>
 #include <asm/timer.h>
 #include <asm/cpudata.h>
+#include <asm/ftrace.h>
 
 struct poll {
 	int fd;
@@ -112,7 +113,6 @@ EXPORT_SYMBOL(smp_call_function);
 #endif /* CONFIG_SMP */
 
 #if defined(CONFIG_MCOUNT)
-extern void _mcount(void);
 EXPORT_SYMBOL(_mcount);
 #endif
 
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 04ea83ccb97..95e6bbe3665 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -51,6 +51,7 @@
 #include <asm/percpu.h>
 #include <asm/dwarf2.h>
 #include <asm/processor-flags.h>
+#include <asm/ftrace.h>
 #include "irq_vectors.h"
 
 /*
@@ -1118,6 +1119,7 @@ ENTRY(mcount)
 	pushl %ecx
 	pushl %edx
 	movl 0xc(%esp), %eax
+	subl $MCOUNT_INSN_SIZE, %eax
 
 .globl mcount_call
 mcount_call:
@@ -1136,6 +1138,7 @@ ENTRY(ftrace_caller)
 	pushl %edx
 	movl 0xc(%esp), %eax
 	movl 0x4(%ebp), %edx
+	subl $MCOUNT_INSN_SIZE, %eax
 
 .globl ftrace_call
 ftrace_call:
@@ -1166,6 +1169,7 @@ trace:
 	pushl %edx
 	movl 0xc(%esp), %eax
 	movl 0x4(%ebp), %edx
+	subl $MCOUNT_INSN_SIZE, %eax
 
 	call *ftrace_trace_function
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index fe25e5febca..b0f7308f78a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -51,6 +51,7 @@
 #include <asm/page.h>
 #include <asm/irqflags.h>
 #include <asm/paravirt.h>
+#include <asm/ftrace.h>
 
 	.code64
 
@@ -68,6 +69,7 @@ ENTRY(mcount)
 	movq %r9, 48(%rsp)
 
 	movq 0x38(%rsp), %rdi
+	subq $MCOUNT_INSN_SIZE, %rdi
 
 .globl mcount_call
 mcount_call:
@@ -99,6 +101,7 @@ ENTRY(ftrace_caller)
 
 	movq 0x38(%rsp), %rdi
 	movq 8(%rbp), %rsi
+	subq $MCOUNT_INSN_SIZE, %rdi
 
 .globl ftrace_call
 ftrace_call:
@@ -139,6 +142,7 @@ trace:
 
 	movq 0x38(%rsp), %rdi
 	movq 8(%rbp), %rsi
+	subq $MCOUNT_INSN_SIZE, %rdi
 
 	call   *ftrace_trace_function
 
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 55828149e01..ab115cd15fd 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -17,20 +17,21 @@
 #include <linux/list.h>
 
 #include <asm/alternative.h>
+#include <asm/ftrace.h>
 
-#define CALL_BACK		5
 
 /* Long is fine, even if it is only 4 bytes ;-) */
 static long *ftrace_nop;
 
 union ftrace_code_union {
-	char code[5];
+	char code[MCOUNT_INSN_SIZE];
 	struct {
 		char e8;
 		int offset;
 	} __attribute__((packed));
 };
 
+
 static int notrace ftrace_calc_offset(long ip, long addr)
 {
 	return (int)(addr - ip);
@@ -46,7 +47,7 @@ notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 	static union ftrace_code_union calc;
 
 	calc.e8		= 0xe8;
-	calc.offset	= ftrace_calc_offset(ip, addr);
+	calc.offset	= ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
 
 	/*
 	 * No locking needed, this must be called via kstop_machine
@@ -65,9 +66,6 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	unsigned char newch = new_code[4];
 	int faulted = 0;
 
-	/* move the IP back to the start of the call */
-	ip -= CALL_BACK;
-
 	/*
 	 * Note: Due to modules and __init, code can
 	 *  disappear and change, we need to protect against faulting
@@ -102,12 +100,10 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 notrace int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	unsigned long ip = (unsigned long)(&ftrace_call);
-	unsigned char old[5], *new;
+	unsigned char old[MCOUNT_INSN_SIZE], *new;
 	int ret;
 
-	ip += CALL_BACK;
-
-	memcpy(old, &ftrace_call, 5);
+	memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
 	new = ftrace_call_replace(ip, (unsigned long)func);
 	ret = ftrace_modify_code(ip, old, new);
 
@@ -118,16 +114,13 @@ notrace int ftrace_mcount_set(unsigned long *data)
 {
 	unsigned long ip = (long)(&mcount_call);
 	unsigned long *addr = data;
-	unsigned char old[5], *new;
-
-	/* ip is at the location, but modify code will subtact this */
-	ip += CALL_BACK;
+	unsigned char old[MCOUNT_INSN_SIZE], *new;
 
 	/*
 	 * Replace the mcount stub with a pointer to the
 	 * ip recorder function.
 	 */
-	memcpy(old, &mcount_call, 5);
+	memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
 	new = ftrace_call_replace(ip, *addr);
 	*addr = ftrace_modify_code(ip, old, new);
 
@@ -142,8 +135,7 @@ int __init ftrace_dyn_arch_init(void *data)
 
 	ftrace_mcount_set(data);
 
-	ftrace_nop = (unsigned long *)noptable[CALL_BACK];
+	ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
 
 	return 0;
 }
-
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 29999dbb754..dd7ebee446a 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,9 +1,9 @@
-#include <linux/ftrace.h>
 #include <linux/module.h>
 
 #include <asm/checksum.h>
 #include <asm/pgtable.h>
 #include <asm/desc.h>
+#include <asm/ftrace.h>
 
 #ifdef CONFIG_FTRACE
 /* mcount is defined in assembly */
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 122885bc5f3..16ff4bf418d 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -1,7 +1,6 @@
 /* Exports for assembly files.
    All C exports should go in the respective C files. */
 
-#include <linux/ftrace.h>
 #include <linux/module.h>
 #include <linux/smp.h>
 
@@ -11,6 +10,7 @@
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/desc.h>
+#include <asm/ftrace.h>
 
 #ifdef CONFIG_FTRACE
 /* mcount is defined in assembly */
diff --git a/include/asm-arm/ftrace.h b/include/asm-arm/ftrace.h
new file mode 100644
index 00000000000..584ef9a8e5a
--- /dev/null
+++ b/include/asm-arm/ftrace.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_ARM_FTRACE
+#define _ASM_ARM_FTRACE
+
+#ifdef CONFIG_FTRACE
+#define MCOUNT_ADDR		((long)(mcount))
+#define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void mcount(void);
+#endif
+
+#endif
+
+#endif /* _ASM_ARM_FTRACE */
diff --git a/include/asm-powerpc/ftrace.h b/include/asm-powerpc/ftrace.h
index b1bfa704b6e..de921326cca 100644
--- a/include/asm-powerpc/ftrace.h
+++ b/include/asm-powerpc/ftrace.h
@@ -1,6 +1,14 @@
 #ifndef _ASM_POWERPC_FTRACE
 #define _ASM_POWERPC_FTRACE
 
+#ifdef CONFIG_FTRACE
+#define MCOUNT_ADDR		((long)(_mcount))
+#define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
 extern void _mcount(void);
+#endif
 
 #endif
+
+#endif /* _ASM_POWERPC_FTRACE */
diff --git a/include/asm-sparc64/ftrace.h b/include/asm-sparc64/ftrace.h
new file mode 100644
index 00000000000..f76a40a338b
--- /dev/null
+++ b/include/asm-sparc64/ftrace.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_SPARC64_FTRACE
+#define _ASM_SPARC64_FTRACE
+
+#ifdef CONFIG_FTRACE
+#define MCOUNT_ADDR		((long)(_mcount))
+#define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void _mcount(void);
+#endif
+
+#endif
+
+#endif /* _ASM_SPARC64_FTRACE */
diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h
new file mode 100644
index 00000000000..c184441133f
--- /dev/null
+++ b/include/asm-x86/ftrace.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_X86_FTRACE
+#define _ASM_SPARC64_FTRACE
+
+#ifdef CONFIG_FTRACE
+#define MCOUNT_ADDR		((long)(mcount))
+#define MCOUNT_INSN_SIZE	5 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void mcount(void);
+#endif
+
+#endif /* CONFIG_FTRACE */
+
+#endif /* _ASM_X86_FTRACE */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 20e14d0093c..366098d591d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -31,7 +31,6 @@ int unregister_ftrace_function(struct ftrace_ops *ops);
 void clear_ftrace_function(void);
 
 extern void ftrace_stub(unsigned long a0, unsigned long a1);
-extern void mcount(void);
 
 #else /* !CONFIG_FTRACE */
 # define register_ftrace_function(ops) do { } while (0)
@@ -54,7 +53,7 @@ enum {
 
 struct dyn_ftrace {
 	struct hlist_node node;
-	unsigned long	  ip;
+	unsigned long	  ip; /* address of mcount call-site */
 	unsigned long	  flags;
 };
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0d5bcf69952..f1e9e5c74e6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -27,6 +27,8 @@
 #include <linux/hash.h>
 #include <linux/list.h>
 
+#include <asm/ftrace.h>
+
 #include "trace.h"
 
 /* ftrace_enabled is a method to turn ftrace on or off */
@@ -329,7 +331,6 @@ ftrace_record_ip(unsigned long ip)
 }
 
 #define FTRACE_ADDR ((long)(ftrace_caller))
-#define MCOUNT_ADDR ((long)(mcount))
 
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec,
-- 
cgit v1.2.3-70-g09d2


From bcc643dc287cb732e96a1685ac130c3ae8b1c960 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Fri, 20 Jun 2008 21:58:46 +0200
Subject: x86: introduce macro to check whether an address range is in the ISA
 range

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Suresh B Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap.c  | 2 +-
 arch/x86/mm/pat.c      | 5 ++---
 include/asm-x86/e820.h | 1 +
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 7452eb31ed1..6d9960dd6f3 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -142,7 +142,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 	/*
 	 * Don't remap the low PCI/ISA area, it's always mapped..
 	 */
-	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
+	if (is_ISA_range(phys_addr, last_addr))
 		return (__force void __iomem *)phys_to_virt(phys_addr);
 
 	/*
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 227df3ca9bf..5bbc22efe4e 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -215,7 +215,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 	}
 
 	/* Low ISA region is always mapped WB in page table. No need to track */
-	if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) {
+	if (is_ISA_range(start, end - 1)) {
 		if (ret_type)
 			*ret_type = _PAGE_CACHE_WB;
 
@@ -415,9 +415,8 @@ int free_memtype(u64 start, u64 end)
 	}
 
 	/* Low ISA region is always mapped WB. No need to track */
-	if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) {
+	if (is_ISA_range(start, end - 1))
 		return 0;
-	}
 
 	spin_lock(&memtype_lock);
 	list_for_each_entry(ml, &memtype_list, nd) {
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 7004251fc66..5103d0b2c46 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -24,6 +24,7 @@ struct e820map {
 
 #define ISA_START_ADDRESS	0xa0000
 #define ISA_END_ADDRESS		0x100000
+#define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS)
 
 #define BIOS_BEGIN		0x000a0000
 #define BIOS_END		0x00100000
-- 
cgit v1.2.3-70-g09d2


From b664d6bbeeddc77b93f5fea16006b428054f1cd1 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Tue, 24 Jun 2008 12:31:05 +0200
Subject: x86: add X86_FEATURE_IBS cpu feature

This adds IBS to the cpu feature flags allowing Perfmon and OProfile
to use cpu_has().

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/cpufeature.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 0d609c837a4..e2469a78195 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -106,6 +106,7 @@
 /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
 #define X86_FEATURE_LAHF_LM	(6*32+ 0) /* LAHF/SAHF in long mode */
 #define X86_FEATURE_CMP_LEGACY	(6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_IBS		(6*32+ 10) /* Instruction Based Sampling */
 
 /*
  * Auxiliary flags: Linux defined - For features scattered in various
-- 
cgit v1.2.3-70-g09d2


From 08b882c627aeeeb3cfd3c4354f0d360d7949549d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 16 Jun 2008 04:30:01 -0700
Subject: paravirt: add hooks for ptep_modify_prot_start/commit

This patch adds paravirt-ops hooks in pv_mmu_ops for ptep_modify_prot_start and
ptep_modify_prot_commit.  This allows the hypervisor-specific backends to
implement these in some more efficient way.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/paravirt.c |  3 +++
 arch/x86/xen/enlighten.c   |  3 +++
 include/asm-x86/paravirt.h | 28 ++++++++++++++++++++++++++++
 3 files changed, 34 insertions(+)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c98d5468818..f1ab0f72700 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -380,6 +380,9 @@ struct pv_mmu_ops pv_mmu_ops = {
 	.pte_update = paravirt_nop,
 	.pte_update_defer = paravirt_nop,
 
+	.ptep_modify_prot_start = __ptep_modify_prot_start,
+	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
+
 #ifdef CONFIG_HIGHPTE
 	.kmap_atomic_pte = kmap_atomic,
 #endif
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 73fb0c4c150..0b7553cbc52 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1138,6 +1138,9 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.set_pte_at = xen_set_pte_at,
 	.set_pmd = xen_set_pmd_hyper,
 
+	.ptep_modify_prot_start = __ptep_modify_prot_start,
+	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
+
 	.pte_val = xen_pte_val,
 	.pte_flags = native_pte_val,
 	.pgd_val = xen_pgd_val,
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 5ea37a48eec..e9ada314dfc 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -238,6 +238,11 @@ struct pv_mmu_ops {
 	void (*pte_update_defer)(struct mm_struct *mm,
 				 unsigned long addr, pte_t *ptep);
 
+	pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
+					pte_t *ptep);
+	void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
+					pte_t *ptep, pte_t pte);
+
 	pteval_t (*pte_val)(pte_t);
 	pteval_t (*pte_flags)(pte_t);
 	pte_t (*make_pte)(pteval_t pte);
@@ -1039,6 +1044,29 @@ static inline pgdval_t pgd_val(pgd_t pgd)
 	return ret;
 }
 
+#define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
+					   pte_t *ptep)
+{
+	pteval_t ret;
+
+	ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start,
+			 mm, addr, ptep);
+
+	return (pte_t) { .pte = ret };
+}
+
+static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+					   pte_t *ptep, pte_t pte)
+{
+	if (sizeof(pteval_t) > sizeof(long))
+		/* 5 arg words */
+		pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte);
+	else
+		PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit,
+			    mm, addr, ptep, pte.pte);
+}
+
 static inline void set_pte(pte_t *ptep, pte_t pte)
 {
 	if (sizeof(pteval_t) > sizeof(long))
-- 
cgit v1.2.3-70-g09d2


From 3b16cf874861436725c43ba0b68bdd799297be7c Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 26 Jun 2008 11:21:54 +0200
Subject: x86: convert to generic helpers for IPI function calls

This converts x86, x86-64, and xen to use the new helpers for
smp_call_function() and friends, and adds support for
smp_call_function_single().

Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/x86/Kconfig                           |   1 +
 arch/x86/kernel/apic_32.c                  |   4 +
 arch/x86/kernel/entry_64.S                 |   3 +
 arch/x86/kernel/i8259_64.c                 |   4 +
 arch/x86/kernel/smp.c                      | 158 ++++-------------------------
 arch/x86/kernel/smpboot.c                  |   4 +-
 arch/x86/kernel/smpcommon.c                |  56 ----------
 arch/x86/mach-voyager/voyager_smp.c        |  94 ++++-------------
 arch/x86/xen/enlighten.c                   |   4 +-
 arch/x86/xen/mmu.c                         |   2 +-
 arch/x86/xen/smp.c                         | 133 +++++++++---------------
 arch/x86/xen/xen-ops.h                     |   9 +-
 include/asm-x86/hw_irq_32.h                |   1 +
 include/asm-x86/hw_irq_64.h                |   2 +
 include/asm-x86/mach-default/entry_arch.h  |   1 +
 include/asm-x86/mach-default/irq_vectors.h |   1 +
 include/asm-x86/mach-voyager/entry_arch.h  |   2 +-
 include/asm-x86/mach-voyager/irq_vectors.h |   4 +-
 include/asm-x86/smp.h                      |  21 ++--
 include/asm-x86/xen/events.h               |   1 +
 20 files changed, 125 insertions(+), 380 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e0edaaa6920..2f3fbebf51d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -168,6 +168,7 @@ config GENERIC_PENDING_IRQ
 config X86_SMP
 	bool
 	depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
+	select USE_GENERIC_SMP_HELPERS
 	default y
 
 config X86_32_SMP
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4b99b1bdeb6..71017f71f4b 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1358,6 +1358,10 @@ void __init smp_intr_init(void)
 
 	/* IPI for generic function call */
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+
+	/* IPI for single call function */
+	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+				call_function_single_interrupt);
 }
 #endif
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 556a8df522a..6d1fe270a96 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -711,6 +711,9 @@ END(invalidate_interrupt\num)
 ENTRY(call_function_interrupt)
 	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
 END(call_function_interrupt)
+ENTRY(call_function_single_interrupt)
+	apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
+END(call_function_single_interrupt)
 ENTRY(irq_move_cleanup_interrupt)
 	apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
 END(irq_move_cleanup_interrupt)
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index fa57a156850..00d2ccdc69f 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -494,6 +494,10 @@ void __init native_init_IRQ(void)
 	/* IPI for generic function call */
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
+	/* IPI for generic single function call */
+	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+				call_function_single_interrupt);
+
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 #endif
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 0cb7aadc87c..575aa3d7248 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -121,132 +121,23 @@ static void native_smp_send_reschedule(int cpu)
 	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
 }
 
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t started;
-	atomic_t finished;
-	int wait;
-};
-
-void lock_ipi_call_lock(void)
+void native_send_call_func_single_ipi(int cpu)
 {
-	spin_lock_irq(&call_lock);
-}
-
-void unlock_ipi_call_lock(void)
-{
-	spin_unlock_irq(&call_lock);
-}
-
-static struct call_data_struct *call_data;
-
-static void __smp_call_function(void (*func) (void *info), void *info,
-				int nonatomic, int wait)
-{
-	struct call_data_struct data;
-	int cpus = num_online_cpus() - 1;
-
-	if (!cpus)
-		return;
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb();
-
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
+	send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
 }
 
-
-/**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on.  Must not include the current cpu.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
-  * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-static int
-native_smp_call_function_mask(cpumask_t mask,
-			      void (*func)(void *), void *info,
-			      int wait)
+void native_send_call_func_ipi(cpumask_t mask)
 {
-	struct call_data_struct data;
 	cpumask_t allbutself;
-	int cpus;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	/* Holding any lock stops cpus from going down. */
-	spin_lock(&call_lock);
 
 	allbutself = cpu_online_map;
 	cpu_clear(smp_processor_id(), allbutself);
 
-	cpus_and(mask, mask, allbutself);
-	cpus = cpus_weight(mask);
-
-	if (!cpus) {
-		spin_unlock(&call_lock);
-		return 0;
-	}
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	wmb();
-
-	/* Send a message to other CPUs */
 	if (cpus_equal(mask, allbutself) &&
 	    cpus_equal(cpu_online_map, cpu_callout_map))
 		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 	else
 		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
-	spin_unlock(&call_lock);
-
-	return 0;
 }
 
 static void stop_this_cpu(void *dummy)
@@ -268,18 +159,13 @@ static void stop_this_cpu(void *dummy)
 
 static void native_smp_send_stop(void)
 {
-	int nolock;
 	unsigned long flags;
 
 	if (reboot_force)
 		return;
 
-	/* Don't deadlock on the call lock in panic */
-	nolock = !spin_trylock(&call_lock);
+	smp_call_function(stop_this_cpu, NULL, 0, 0);
 	local_irq_save(flags);
-	__smp_call_function(stop_this_cpu, NULL, 0, 0);
-	if (!nolock)
-		spin_unlock(&call_lock);
 	disable_local_APIC();
 	local_irq_restore(flags);
 }
@@ -301,33 +187,28 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
 
 void smp_call_function_interrupt(struct pt_regs *regs)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
-
 	ack_APIC_irq();
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	atomic_inc(&call_data->started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func)(info);
+	generic_smp_call_function_interrupt();
 #ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
 #else
 	add_pda(irq_call_count, 1);
 #endif
 	irq_exit();
+}
 
-	if (wait) {
-		mb();
-		atomic_inc(&call_data->finished);
-	}
+void smp_call_function_single_interrupt(void)
+{
+	ack_APIC_irq();
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+#ifdef CONFIG_X86_32
+	__get_cpu_var(irq_stat).irq_call_count++;
+#else
+	add_pda(irq_call_count, 1);
+#endif
+	irq_exit();
 }
 
 struct smp_ops smp_ops = {
@@ -338,7 +219,8 @@ struct smp_ops smp_ops = {
 
 	.smp_send_stop = native_smp_send_stop,
 	.smp_send_reschedule = native_smp_send_reschedule,
-	.smp_call_function_mask = native_smp_call_function_mask,
+
+	.send_call_func_ipi = native_send_call_func_ipi,
+	.send_call_func_single_ipi = native_send_call_func_single_ipi,
 };
 EXPORT_SYMBOL_GPL(smp_ops);
-
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 56078d61c79..89647898f54 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -345,7 +345,7 @@ static void __cpuinit start_secondary(void *unused)
 	 * lock helps us to not include this cpu in a currently in progress
 	 * smp_call_function().
 	 */
-	lock_ipi_call_lock();
+	ipi_call_lock_irq();
 #ifdef CONFIG_X86_64
 	spin_lock(&vector_lock);
 
@@ -357,7 +357,7 @@ static void __cpuinit start_secondary(void *unused)
 	spin_unlock(&vector_lock);
 #endif
 	cpu_set(smp_processor_id(), cpu_online_map);
-	unlock_ipi_call_lock();
+	ipi_call_unlock_irq();
 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
 
 	setup_secondary_clock();
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 3449064d141..99941b37eca 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -25,59 +25,3 @@ __cpuinit void init_gdt(int cpu)
 	per_cpu(cpu_number, cpu) = cpu;
 }
 #endif
-
-/**
- * smp_call_function(): Run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
-		      int wait)
-{
-	return smp_call_function_mask(cpu_online_map, func, info, wait);
-}
-EXPORT_SYMBOL(smp_call_function);
-
-/**
- * smp_call_function_single - Run a function on a specific CPU
- * @cpu: The target CPU.  Cannot be the calling CPU.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- */
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-			     int nonatomic, int wait)
-{
-	/* prevent preemption and reschedule on another processor */
-	int ret;
-	int me = get_cpu();
-	if (cpu == me) {
-		local_irq_disable();
-		func(info);
-		local_irq_enable();
-		put_cpu();
-		return 0;
-	}
-
-	ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
-
-	put_cpu();
-	return ret;
-}
-EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 8acbf0cdf1a..cb34407a993 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -955,94 +955,24 @@ static void smp_stop_cpu_function(void *dummy)
 		halt();
 }
 
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	volatile unsigned long started;
-	volatile unsigned long finished;
-	int wait;
-};
-
-static struct call_data_struct *call_data;
-
 /* execute a thread on a new CPU.  The function to be called must be
  * previously set up.  This is used to schedule a function for
  * execution on all CPUs - set up the function then broadcast a
  * function_interrupt CPI to come here on each CPU */
 static void smp_call_function_interrupt(void)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	/* must take copy of wait because call_data may be replaced
-	 * unless the function is waiting for us to finish */
-	int wait = call_data->wait;
-	__u8 cpu = smp_processor_id();
-
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	if (!test_and_clear_bit(cpu, &call_data->started)) {
-		/* If the bit wasn't set, this could be a replay */
-		printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion"
-		       " with no call pending\n", cpu);
-		return;
-	}
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func) (info);
+	generic_smp_call_function_interrupt();
 	__get_cpu_var(irq_stat).irq_call_count++;
 	irq_exit();
-	if (wait) {
-		mb();
-		clear_bit(cpu, &call_data->finished);
-	}
 }
 
-static int
-voyager_smp_call_function_mask(cpumask_t cpumask,
-			       void (*func) (void *info), void *info, int wait)
+static void smp_call_function_single_interrupt(void)
 {
-	struct call_data_struct data;
-	u32 mask = cpus_addr(cpumask)[0];
-
-	mask &= ~(1 << smp_processor_id());
-
-	if (!mask)
-		return 0;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	data.started = mask;
-	data.wait = wait;
-	if (wait)
-		data.finished = mask;
-
-	spin_lock(&call_lock);
-	call_data = &data;
-	wmb();
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_CPI(mask, VIC_CALL_FUNCTION_CPI);
-
-	/* Wait for response */
-	while (data.started)
-		barrier();
-
-	if (wait)
-		while (data.finished)
-			barrier();
-
-	spin_unlock(&call_lock);
-
-	return 0;
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+	__get_cpu_var(irq_stat).irq_call_count++;
+	irq_exit();
 }
 
 /* Sorry about the name.  In an APIC based system, the APICs
@@ -1099,6 +1029,12 @@ void smp_qic_call_function_interrupt(struct pt_regs *regs)
 	smp_call_function_interrupt();
 }
 
+void smp_qic_call_function_single_interrupt(struct pt_regs *regs)
+{
+	ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI);
+	smp_call_function_single_interrupt();
+}
+
 void smp_vic_cpi_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1119,6 +1055,8 @@ void smp_vic_cpi_interrupt(struct pt_regs *regs)
 		smp_enable_irq_interrupt();
 	if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
 		smp_call_function_interrupt();
+	if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu]))
+		smp_call_function_single_interrupt();
 	set_irq_regs(old_regs);
 }
 
@@ -1862,5 +1800,7 @@ struct smp_ops smp_ops = {
 
 	.smp_send_stop = voyager_smp_send_stop,
 	.smp_send_reschedule = voyager_smp_send_reschedule,
-	.smp_call_function_mask = voyager_smp_call_function_mask,
+
+	.send_call_func_ipi = native_send_call_func_ipi,
+	.send_call_func_single_ipi = native_send_call_func_single_ipi,
 };
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f09c1c69c37..8e317782fe3 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1108,7 +1108,9 @@ static const struct smp_ops xen_smp_ops __initdata = {
 
 	.smp_send_stop = xen_smp_send_stop,
 	.smp_send_reschedule = xen_smp_send_reschedule,
-	.smp_call_function_mask = xen_smp_call_function_mask,
+
+	.send_call_func_ipi = xen_smp_send_call_function_ipi,
+	.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
 };
 #endif	/* CONFIG_SMP */
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index df40bf74ea7..5c01590380b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -558,7 +558,7 @@ static void drop_mm_ref(struct mm_struct *mm)
 	}
 
 	if (!cpus_empty(mask))
-		xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
+		smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
 }
 #else
 static void drop_mm_ref(struct mm_struct *mm)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 94e69000f98..b3786e749b8 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -36,27 +36,14 @@
 #include "mmu.h"
 
 static cpumask_t xen_cpu_initialized_map;
-static DEFINE_PER_CPU(int, resched_irq) = -1;
-static DEFINE_PER_CPU(int, callfunc_irq) = -1;
-static DEFINE_PER_CPU(int, debug_irq) = -1;
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
 
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t started;
-	atomic_t finished;
-	int wait;
-};
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static DEFINE_PER_CPU(int, callfuncsingle_irq);
+static DEFINE_PER_CPU(int, debug_irq) = -1;
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
-
-static struct call_data_struct *call_data;
+static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
 
 /*
  * Reschedule call back. Nothing to do,
@@ -122,6 +109,17 @@ static int xen_smp_intr_init(unsigned int cpu)
 		goto fail;
 	per_cpu(debug_irq, cpu) = rc;
 
+	callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+	rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
+				    cpu,
+				    xen_call_function_single_interrupt,
+				    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+				    callfunc_name,
+				    NULL);
+	if (rc < 0)
+		goto fail;
+	per_cpu(callfuncsingle_irq, cpu) = rc;
+
 	return 0;
 
  fail:
@@ -131,6 +129,9 @@ static int xen_smp_intr_init(unsigned int cpu)
 		unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
 	if (per_cpu(debug_irq, cpu) >= 0)
 		unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
+	if (per_cpu(callfuncsingle_irq, cpu) >= 0)
+		unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+
 	return rc;
 }
 
@@ -338,7 +339,6 @@ void xen_smp_send_reschedule(int cpu)
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
 
-
 static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 {
 	unsigned cpu;
@@ -349,83 +349,42 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 		xen_send_IPI_one(cpu, vector);
 }
 
+void xen_smp_send_call_function_ipi(cpumask_t mask)
+{
+	int cpu;
+
+	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+
+	/* Make sure other vcpus get a chance to run if they need to. */
+	for_each_cpu_mask(cpu, mask) {
+		if (xen_vcpu_stolen(cpu)) {
+			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+			break;
+		}
+	}
+}
+
+void xen_smp_send_call_function_single_ipi(int cpu)
+{
+	xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
+}
+
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
-
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	atomic_inc(&call_data->started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func)(info);
+	generic_smp_call_function_interrupt();
 	__get_cpu_var(irq_stat).irq_call_count++;
 	irq_exit();
 
-	if (wait) {
-		mb();		/* commit everything before setting finished */
-		atomic_inc(&call_data->finished);
-	}
-
 	return IRQ_HANDLED;
 }
 
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
-			       void *info, int wait)
+static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 {
-	struct call_data_struct data;
-	int cpus, cpu;
-	bool yield;
-
-	/* Holding any lock stops cpus from going down. */
-	spin_lock(&call_lock);
-
-	cpu_clear(smp_processor_id(), mask);
-
-	cpus = cpus_weight(mask);
-	if (!cpus) {
-		spin_unlock(&call_lock);
-		return 0;
-	}
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb();			/* write everything before IPI */
-
-	/* Send a message to other CPUs and wait for them to respond */
-	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
-
-	/* Make sure other vcpus get a chance to run if they need to. */
-	yield = false;
-	for_each_cpu_mask(cpu, mask)
-		if (xen_vcpu_stolen(cpu))
-			yield = true;
-
-	if (yield)
-		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus ||
-	       (wait && atomic_read(&data.finished) != cpus))
-		cpu_relax();
-
-	spin_unlock(&call_lock);
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+	__get_cpu_var(irq_stat).irq_call_count++;
+	irq_exit();
 
-	return 0;
+	return IRQ_HANDLED;
 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f1063ae0803..a636ab5e134 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -46,13 +46,8 @@ void xen_smp_cpus_done(unsigned int max_cpus);
 
 void xen_smp_send_stop(void);
 void xen_smp_send_reschedule(int cpu);
-int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-			   int wait);
-int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-				 int nonatomic, int wait);
-
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
-			       void *info, int wait);
+void xen_smp_send_call_function_ipi(cpumask_t mask);
+void xen_smp_send_call_function_single_ipi(int cpu);
 
 
 /* Declare an asm function, along with symbols needed to make it
diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h
index ea88054e03f..a87b1320c78 100644
--- a/include/asm-x86/hw_irq_32.h
+++ b/include/asm-x86/hw_irq_32.h
@@ -32,6 +32,7 @@ extern void (*const interrupt[NR_IRQS])(void);
 void reschedule_interrupt(void);
 void invalidate_interrupt(void);
 void call_function_interrupt(void);
+void call_function_single_interrupt(void);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
index 0062ef390f6..fe657812d4d 100644
--- a/include/asm-x86/hw_irq_64.h
+++ b/include/asm-x86/hw_irq_64.h
@@ -68,6 +68,7 @@
 #define ERROR_APIC_VECTOR	0xfe
 #define RESCHEDULE_VECTOR	0xfd
 #define CALL_FUNCTION_VECTOR	0xfc
+#define	CALL_FUNCTION_SINGLE_VECTOR	0xfb
 /* fb free - please don't readd KDB here because it's useless
    (hint - think what a NMI bit does to a vector) */
 #define THERMAL_APIC_VECTOR	0xfa
@@ -102,6 +103,7 @@ void spurious_interrupt(void);
 void error_interrupt(void);
 void reschedule_interrupt(void);
 void call_function_interrupt(void);
+void call_function_single_interrupt(void);
 void irq_move_cleanup_interrupt(void);
 void invalidate_interrupt0(void);
 void invalidate_interrupt1(void);
diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
index bc861469bdb..9283b60a1dd 100644
--- a/include/asm-x86/mach-default/entry_arch.h
+++ b/include/asm-x86/mach-default/entry_arch.h
@@ -13,6 +13,7 @@
 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
+BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
 #endif
 
 /*
diff --git a/include/asm-x86/mach-default/irq_vectors.h b/include/asm-x86/mach-default/irq_vectors.h
index 881c63ca61a..ed7d4955c65 100644
--- a/include/asm-x86/mach-default/irq_vectors.h
+++ b/include/asm-x86/mach-default/irq_vectors.h
@@ -48,6 +48,7 @@
 #define INVALIDATE_TLB_VECTOR	0xfd
 #define RESCHEDULE_VECTOR	0xfc
 #define CALL_FUNCTION_VECTOR	0xfb
+#define CALL_FUNCTION_SINGLE_VECTOR	0xfa
 
 #define THERMAL_APIC_VECTOR	0xf0
 /*
diff --git a/include/asm-x86/mach-voyager/entry_arch.h b/include/asm-x86/mach-voyager/entry_arch.h
index 4a1e1e8c10b..ae52624b593 100644
--- a/include/asm-x86/mach-voyager/entry_arch.h
+++ b/include/asm-x86/mach-voyager/entry_arch.h
@@ -23,4 +23,4 @@ BUILD_INTERRUPT(qic_invalidate_interrupt, QIC_INVALIDATE_CPI);
 BUILD_INTERRUPT(qic_reschedule_interrupt, QIC_RESCHEDULE_CPI);
 BUILD_INTERRUPT(qic_enable_irq_interrupt, QIC_ENABLE_IRQ_CPI);
 BUILD_INTERRUPT(qic_call_function_interrupt, QIC_CALL_FUNCTION_CPI);
-
+BUILD_INTERRUPT(qic_call_function_single_interrupt, QIC_CALL_FUNCTION_SINGLE_CPI);
diff --git a/include/asm-x86/mach-voyager/irq_vectors.h b/include/asm-x86/mach-voyager/irq_vectors.h
index 165421f5821..fda57ad37b5 100644
--- a/include/asm-x86/mach-voyager/irq_vectors.h
+++ b/include/asm-x86/mach-voyager/irq_vectors.h
@@ -33,6 +33,7 @@
 #define VIC_RESCHEDULE_CPI		4
 #define VIC_ENABLE_IRQ_CPI		5
 #define VIC_CALL_FUNCTION_CPI		6
+#define VIC_CALL_FUNCTION_SINGLE_CPI	7
 
 /* Now the QIC CPIs:  Since we don't need the two initial levels,
  * these are 2 less than the VIC CPIs */
@@ -42,9 +43,10 @@
 #define QIC_RESCHEDULE_CPI		(VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET)
 #define QIC_ENABLE_IRQ_CPI		(VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET)
 #define QIC_CALL_FUNCTION_CPI		(VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET)
+#define QIC_CALL_FUNCTION_SINGLE_CPI	(VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET)
 
 #define VIC_START_FAKE_CPI		VIC_TIMER_CPI
-#define VIC_END_FAKE_CPI		VIC_CALL_FUNCTION_CPI
+#define VIC_END_FAKE_CPI		VIC_CALL_FUNCTION_SINGLE_CPI
 
 /* this is the SYS_INT CPI. */
 #define VIC_SYS_INT			8
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 1ebaa5cd311..e3c24807b59 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -59,9 +59,9 @@ struct smp_ops {
 
 	void (*smp_send_stop)(void);
 	void (*smp_send_reschedule)(int cpu);
-	int (*smp_call_function_mask)(cpumask_t mask,
-				      void (*func)(void *info), void *info,
-				      int wait);
+
+	void (*send_call_func_ipi)(cpumask_t mask);
+	void (*send_call_func_single_ipi)(int cpu);
 };
 
 /* Globals due to paravirt */
@@ -103,17 +103,22 @@ static inline void smp_send_reschedule(int cpu)
 	smp_ops.smp_send_reschedule(cpu);
 }
 
-static inline int smp_call_function_mask(cpumask_t mask,
-					 void (*func) (void *info), void *info,
-					 int wait)
+static inline void arch_send_call_function_single_ipi(int cpu)
+{
+	smp_ops.send_call_func_single_ipi(cpu);
+}
+
+static inline void arch_send_call_function_ipi(cpumask_t mask)
 {
-	return smp_ops.smp_call_function_mask(mask, func, info, wait);
+	smp_ops.send_call_func_ipi(mask);
 }
 
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void native_smp_cpus_done(unsigned int max_cpus);
 int native_cpu_up(unsigned int cpunum);
+void native_send_call_func_ipi(cpumask_t mask);
+void native_send_call_func_single_ipi(int cpu);
 
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
@@ -202,7 +207,5 @@ extern void cpu_uninit(void);
 #endif
 
 extern void smp_alloc_memory(void);
-extern void lock_ipi_call_lock(void);
-extern void unlock_ipi_call_lock(void);
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h
index 596312a7bfc..f8d57ea1f05 100644
--- a/include/asm-x86/xen/events.h
+++ b/include/asm-x86/xen/events.h
@@ -4,6 +4,7 @@
 enum ipi_vector {
 	XEN_RESCHEDULE_VECTOR,
 	XEN_CALL_FUNCTION_VECTOR,
+	XEN_CALL_FUNCTION_SINGLE_VECTOR,
 
 	XEN_NR_IPIS,
 };
-- 
cgit v1.2.3-70-g09d2


From 41aefdcc98fdba47459eab67630293d67e855fc3 Mon Sep 17 00:00:00 2001
From: Max Asbock <masbock@us.ibm.com>
Date: Wed, 25 Jun 2008 14:45:28 -0700
Subject: x86: shift bits the right way in native_read_tscp

native_read_tscp shifts the bits in the high order value in the
wrong direction, the attached patch fixes that.

Signed-off-by: Max Asbock <masbock@linux.vnet.ibm.com>
Acked-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/msr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 3707650a169..2b5f2c91db2 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -18,7 +18,7 @@ static inline unsigned long long native_read_tscp(unsigned int *aux)
 	unsigned long low, high;
 	asm volatile(".byte 0x0f,0x01,0xf9"
 		     : "=a" (low), "=d" (high), "=c" (*aux));
-	return low | ((u64)high >> 32);
+	return low | ((u64)high << 32);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From ac2564c44509aba1e21d878ffd4f34ac96811762 Mon Sep 17 00:00:00 2001
From: Matti Linnanvuori <mattilinnanvuori@yahoo.com>
Date: Sun, 29 Jun 2008 09:22:43 +0300
Subject: x86: add compilation checks to pci_unmap_*() macros

Add compilation checks to pci_unmap_ macros.

Signed-off-by: Matti Linnanvuori <mattilinnanvuori@yahoo.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/pci_32.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/pci_32.h b/include/asm-x86/pci_32.h
index 8c4c3a0368e..a50d4685128 100644
--- a/include/asm-x86/pci_32.h
+++ b/include/asm-x86/pci_32.h
@@ -18,12 +18,14 @@ struct pci_dev;
 #define PCI_DMA_BUS_IS_PHYS	(1)
 
 /* pci_unmap_{page,single} is a nop so... */
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
-#define pci_unmap_addr(PTR, ADDR_NAME)		(0)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)	do { } while (0)
-#define pci_unmap_len(PTR, LEN_NAME)		(0)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL)	do { } while (0)
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)	dma_addr_t ADDR_NAME[0];
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)	unsigned LEN_NAME[0];
+#define pci_unmap_addr(PTR, ADDR_NAME)	sizeof((PTR)->ADDR_NAME)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
+	do { break; } while (pci_unmap_addr(PTR, ADDR_NAME))
+#define pci_unmap_len(PTR, LEN_NAME)		sizeof((PTR)->LEN_NAME)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
+	do { break; } while (pci_unmap_len(PTR, LEN_NAME))
 
 
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3-70-g09d2


From 84e65b0a84a2c856bef36f13d122047678408b0a Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Fri, 4 Jul 2008 13:56:16 +0100
Subject: x86: cacheline_align tss_struct

The manual padding to align on cacheline size only worked in 32 bit
In 64 bit the structure was not aligned and contained wasted space.

use the compiler ____cachline_aligned to save space & properly align
this structure.

x86_64_default size goes from 9136 -> 8960
x86_64_AMD     size goes from 9136 -> 8896

built & running on 2.6.26-rc8.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/processor.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 559105220a4..4ab2ede6f4b 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -262,16 +262,12 @@ struct tss_struct {
 	unsigned long		io_bitmap_max;
 	struct thread_struct	*io_bitmap_owner;
 
-	/*
-	 * Pad the TSS to be cacheline-aligned (size is 0x100):
-	 */
-	unsigned long		__cacheline_filler[35];
 	/*
 	 * .. and then another 0x100 bytes for the emergency kernel stack:
 	 */
 	unsigned long		stack[64];
 
-} __attribute__((packed));
+} ____cacheline_aligned;
 
 DECLARE_PER_CPU(struct tss_struct, init_tss);
 
-- 
cgit v1.2.3-70-g09d2


From ca3739327b89bb4053a62ac41b67b106c1967ab0 Mon Sep 17 00:00:00 2001
From: Anthony Liguori <aliguori@us.ibm.com>
Date: Thu, 3 Jul 2008 19:02:36 +0300
Subject: x86: KVM guest: Add memory clobber to hypercalls

Hypercalls can modify arbitrary regions of memory.  Make sure to indicate this
in the clobber list.  This fixes a hang when using KVM_GUEST kernel built with
GCC 4.3.0.

This was originally spotted and analyzed by Marcelo.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_para.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index bfd9900742b..76f392146da 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -71,7 +71,8 @@ static inline long kvm_hypercall0(unsigned int nr)
 	long ret;
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
-		     : "a"(nr));
+		     : "a"(nr)
+		     : "memory");
 	return ret;
 }
 
@@ -80,7 +81,8 @@ static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
 	long ret;
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
-		     : "a"(nr), "b"(p1));
+		     : "a"(nr), "b"(p1)
+		     : "memory");
 	return ret;
 }
 
@@ -90,7 +92,8 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
 	long ret;
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
-		     : "a"(nr), "b"(p1), "c"(p2));
+		     : "a"(nr), "b"(p1), "c"(p2)
+		     : "memory");
 	return ret;
 }
 
@@ -100,7 +103,8 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
 	long ret;
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
-		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3));
+		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
+		     : "memory");
 	return ret;
 }
 
@@ -111,7 +115,8 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
 	long ret;
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
-		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4));
+		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4)
+		     : "memory");
 	return ret;
 }
 
-- 
cgit v1.2.3-70-g09d2


From aa276e1cafb3ce9d01d1e837bcd67e92616013ac Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 9 Jun 2008 19:15:00 +0200
Subject: x86, clockevents: add C1E aware idle function

C1E on AMD machines is like C3 but without control from the OS. Up to
now we disabled the local apic timer for those machines as it stops
when the CPU goes into C1E. This excludes those machines from high
resolution timers / dynamic ticks, which hurts especially X2 based
laptops.

The current boot time C1E detection has another, more serious flaw
as well: some BIOSes do not enable C1E until the ACPI processor module
is loaded. This causes systems to stop working after that point.

To work nicely with C1E enabled machines we use a separate idle
function, which checks on idle entry whether C1E was enabled in the
Interrupt Pending Message MSR. This allows us to do timer broadcasting
for C1E and covers the late enablement of C1E as well.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c    |  5 ++--
 arch/x86/kernel/apic_64.c    | 26 +----------------
 arch/x86/kernel/cpu/amd.c    | 30 --------------------
 arch/x86/kernel/cpu/amd_64.c | 25 -----------------
 arch/x86/kernel/process.c    | 66 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/apic.h       |  3 --
 kernel/time/tick-broadcast.c |  6 ++--
 7 files changed, 73 insertions(+), 88 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4b99b1bdeb6..c44206e731d 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -64,9 +64,8 @@ static int enable_local_apic __initdata;
 
 /* Local APIC timer verification ok */
 static int local_apic_timer_verify_ok;
-/* Disable local APIC timer from the kernel commandline or via dmi quirk
-   or using CPU MSR check */
-int local_apic_timer_disabled;
+/* Disable local APIC timer from the kernel commandline or via dmi quirk */
+static int local_apic_timer_disabled;
 /* Local APIC timer works in C2 */
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 0633cfd0dc2..a5cc8447cf4 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -43,7 +43,7 @@
 #include <mach_ipi.h>
 #include <mach_apic.h>
 
-int disable_apic_timer __cpuinitdata;
+static int disable_apic_timer __cpuinitdata;
 static int apic_calibrate_pmtmr __initdata;
 int disable_apic;
 
@@ -422,32 +422,8 @@ void __init setup_boot_APIC_clock(void)
 	setup_APIC_timer();
 }
 
-/*
- * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the
- * C1E flag only in the secondary CPU, so when we detect the wreckage
- * we already have enabled the boot CPU local apic timer. Check, if
- * disable_apic_timer is set and the DUMMY flag is cleared. If yes,
- * set the DUMMY flag again and force the broadcast mode in the
- * clockevents layer.
- */
-static void __cpuinit check_boot_apic_timer_broadcast(void)
-{
-	if (!disable_apic_timer ||
-	    (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
-		return;
-
-	printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n");
-	lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
-
-	local_irq_enable();
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
-			   &boot_cpu_physical_apicid);
-	local_irq_disable();
-}
-
 void __cpuinit setup_secondary_APIC_clock(void)
 {
-	check_boot_apic_timer_broadcast();
 	setup_APIC_timer();
 }
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e76b49e7a91..acc891ae590 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -24,31 +24,6 @@
 extern void vide(void);
 __asm__(".align 4\nvide: ret");
 
-#ifdef CONFIG_X86_LOCAL_APIC
-
-/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
-static __cpuinit int amd_apic_timer_broken(struct cpuinfo_x86 *c)
-{
-	u32 lo, hi;
-
-	if (c->x86 < 0x0F)
-		return 0;
-
-	/* Family 0x0f models < rev F do not have this MSR */
-	if (c->x86 == 0x0f && c->x86_model < 0x40)
-		return 0;
-
-	rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
-	if (lo & K8_INTP_C1E_ACTIVE_MASK) {
-		if (smp_processor_id() != boot_cpu_physical_apicid)
-			printk(KERN_INFO "AMD C1E detected late. "
-			       "Force timer broadcast.\n");
-		return 1;
-	}
-	return 0;
-}
-#endif
-
 int force_mwait __cpuinitdata;
 
 static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
@@ -285,11 +260,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 			num_cache_leaves = 3;
 	}
 
-#ifdef CONFIG_X86_LOCAL_APIC
-	if (amd_apic_timer_broken(c))
-		local_apic_timer_disabled = 1;
-#endif
-
 	/* K6s reports MCEs but don't actually have all the MSRs */
 	if (c->x86 < 6)
 		clear_cpu_cap(c, X86_FEATURE_MCE);
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index f5fc161d8f2..f8d20588bde 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -110,28 +110,6 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
 #endif
 }
 
-/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
-static __cpuinit int amd_apic_timer_broken(struct cpuinfo_x86 *c)
-{
-	u32 lo, hi;
-
-	if (c->x86 < 0x0F)
-		return 0;
-
-	/* Family 0x0f models < rev F do not have this MSR */
-	if (c->x86 == 0x0f && c->x86_model < 0x40)
-		return 0;
-
-	rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
-	if (lo & K8_INTP_C1E_ACTIVE_MASK) {
-		if (smp_processor_id() != boot_cpu_physical_apicid)
-			printk(KERN_INFO "AMD C1E detected late. "
-			       "Force timer broadcast.\n");
-		return 1;
-	}
-	return 0;
-}
-
 void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 {
 	early_init_amd_mc(c);
@@ -212,9 +190,6 @@ void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 == 0x10)
 		amd_enable_pci_ext_cfg(c);
 
-	if (amd_apic_timer_broken(c))
-		disable_apic_timer = 1;
-
 	if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
 		unsigned long long tseg;
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9fea14607df..68ad3539b14 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -6,6 +6,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/pm.h>
+#include <linux/clockchips.h>
 
 struct kmem_cache *task_xstate_cachep;
 
@@ -219,6 +220,68 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
 	return (edx & MWAIT_EDX_C1);
 }
 
+/*
+ * Check for AMD CPUs, which have potentially C1E support
+ */
+static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
+{
+	if (c->x86_vendor != X86_VENDOR_AMD)
+		return 0;
+
+	if (c->x86 < 0x0F)
+		return 0;
+
+	/* Family 0x0f models < rev F do not have C1E */
+	if (c->x86 == 0x0f && c->x86_model < 0x40)
+		return 0;
+
+	return 1;
+}
+
+/*
+ * C1E aware idle routine. We check for C1E active in the interrupt
+ * pending message MSR. If we detect C1E, then we handle it the same
+ * way as C3 power states (local apic timer and TSC stop)
+ */
+static void c1e_idle(void)
+{
+	static cpumask_t c1e_mask = CPU_MASK_NONE;
+	static int c1e_detected;
+
+	if (need_resched())
+		return;
+
+	if (!c1e_detected) {
+		u32 lo, hi;
+
+		rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
+		if (lo & K8_INTP_C1E_ACTIVE_MASK) {
+			c1e_detected = 1;
+			mark_tsc_unstable("TSC halt in C1E");
+			printk(KERN_INFO "System has C1E enabled\n");
+		}
+	}
+
+	if (c1e_detected) {
+		int cpu = smp_processor_id();
+
+		if (!cpu_isset(cpu, c1e_mask)) {
+			cpu_set(cpu, c1e_mask);
+			/* Force broadcast so ACPI can not interfere */
+			clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
+					   &cpu);
+			printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
+			       cpu);
+		}
+		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+		default_idle();
+		local_irq_disable();
+		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+		local_irq_enable();
+	} else
+		default_idle();
+}
+
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_X86_SMP
@@ -236,6 +299,9 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 		 */
 		printk(KERN_INFO "using mwait in idle threads.\n");
 		pm_idle = mwait_idle;
+	} else if (check_c1e_idle(c)) {
+		printk(KERN_INFO "using C1E aware idle routine\n");
+		pm_idle = c1e_idle;
 	} else
 		pm_idle = default_idle;
 }
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index be9639a9a18..3c387cda95f 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -38,12 +38,9 @@ extern void generic_apic_probe(void);
 extern int apic_verbosity;
 extern int timer_over_8254;
 extern int local_apic_timer_c2_ok;
-extern int local_apic_timer_disabled;
 
-extern int apic_runs_main_timer;
 extern int ioapic_force;
 extern int disable_apic;
-extern int disable_apic_timer;
 
 /*
  * Basic functions accessing APICs.
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 57a1f02e5ec..67f80c26170 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -30,6 +30,7 @@
 struct tick_device tick_broadcast_device;
 static cpumask_t tick_broadcast_mask;
 static DEFINE_SPINLOCK(tick_broadcast_lock);
+static int tick_broadcast_force;
 
 #ifdef CONFIG_TICK_ONESHOT
 static void tick_broadcast_clear_oneshot(int cpu);
@@ -232,10 +233,11 @@ static void tick_do_broadcast_on_off(void *why)
 						     CLOCK_EVT_MODE_SHUTDOWN);
 		}
 		if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
-			dev->features |= CLOCK_EVT_FEAT_DUMMY;
+			tick_broadcast_force = 1;
 		break;
 	case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
-		if (cpu_isset(cpu, tick_broadcast_mask)) {
+		if (!tick_broadcast_force &&
+		    cpu_isset(cpu, tick_broadcast_mask)) {
 			cpu_clear(cpu, tick_broadcast_mask);
 			if (td->mode == TICKDEV_MODE_PERIODIC)
 				tick_setup_periodic(dev, 0);
-- 
cgit v1.2.3-70-g09d2


From 3a27dd1ce5de08e21e0266ddf00e6f1f843bfe8b Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 12 Jun 2008 20:19:23 +0200
Subject: x86: Move PCI IO ECS code to x86/pci

"Form follows function". Code is now where it belongs to.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c    |  3 ---
 arch/x86/kernel/cpu/amd_64.c |  4 ----
 arch/x86/kernel/cpu/cpu.h    |  2 --
 arch/x86/kernel/setup.c      | 13 -------------
 arch/x86/pci/Makefile_32     |  1 +
 arch/x86/pci/amd_bus.c       | 32 ++++++++++++++++++++++++++++++++
 arch/x86/pci/direct.c        | 16 +++++++++-------
 arch/x86/pci/pci.h           |  1 +
 include/asm-x86/cpufeature.h |  2 --
 9 files changed, 43 insertions(+), 31 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index acc891ae590..81a07ca65d4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -266,9 +266,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 	if (cpu_has_xmm2)
 		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
-
-	if (c->x86 == 0x10)
-		amd_enable_pci_ext_cfg(c);
 }
 
 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index f8d20588bde..250bfe6064a 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -6,7 +6,6 @@
 #include <asm/cacheflush.h>
 
 #include <mach_apic.h>
-#include "cpu.h"
 
 extern int __cpuinit get_model_name(struct cpuinfo_x86 *c);
 extern void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c);
@@ -187,9 +186,6 @@ void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 == 0x10)
 		fam10h_check_enable_mmcfg();
 
-	if (c->x86 == 0x10)
-		amd_enable_pci_ext_cfg(c);
-
 	if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
 		unsigned long long tseg;
 
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index f5d5bb1b554..40ad1893fe8 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -39,5 +39,3 @@ extern int get_model_name(struct cpuinfo_x86 *c);
 extern void display_cacheinfo(struct cpuinfo_x86 *c);
 
 #endif /* CONFIG_X86_32 */
-
-extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 20e14dbef10..6f80b852a19 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -137,16 +137,3 @@ void __init setup_per_cpu_areas(void)
 }
 
 #endif
-#define ENABLE_CF8_EXT_CFG      (1ULL << 46)
-
-void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c)
-{
-	u64 reg;
-	rdmsrl(MSR_AMD64_NB_CFG, reg);
-	if (!(reg & ENABLE_CF8_EXT_CFG)) {
-		reg |= ENABLE_CF8_EXT_CFG;
-		wrmsrl(MSR_AMD64_NB_CFG, reg);
-	}
-	set_cpu_cap(c, X86_FEATURE_PCI_EXT_CFG);
-}
-
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32
index 89ec35d00ef..f647e7e56da 100644
--- a/arch/x86/pci/Makefile_32
+++ b/arch/x86/pci/Makefile_32
@@ -22,3 +22,4 @@ pci-$(CONFIG_X86_NUMAQ)		:= numa.o irq.o
 pci-$(CONFIG_NUMA)		+= mp_bus_to_node.o
 
 obj-y				+= $(pci-y) common.o early.o
+obj-y				+= amd_bus.o
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 5c2799c20e4..15f505d3a78 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -1,5 +1,9 @@
 #include <linux/init.h>
 #include <linux/pci.h>
+#include "pci.h"
+
+#ifdef CONFIG_X86_64
+
 #include <asm/pci-direct.h>
 #include <asm/mpspec.h>
 #include <linux/cpumask.h>
@@ -526,3 +530,31 @@ static int __init early_fill_mp_bus_info(void)
 }
 
 postcore_initcall(early_fill_mp_bus_info);
+
+#endif
+
+/* common 32/64 bit code */
+
+#define ENABLE_CF8_EXT_CFG      (1ULL << 46)
+
+static void enable_pci_io_ecs_per_cpu(void *unused)
+{
+	u64 reg;
+	rdmsrl(MSR_AMD64_NB_CFG, reg);
+	if (!(reg & ENABLE_CF8_EXT_CFG)) {
+		reg |= ENABLE_CF8_EXT_CFG;
+		wrmsrl(MSR_AMD64_NB_CFG, reg);
+	}
+}
+
+static int __init enable_pci_io_ecs(void)
+{
+	/* assume all cpus from fam10h have IO ECS */
+        if (boot_cpu_data.x86 < 0x10)
+		return 0;
+	on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1, 1);
+	pci_probe |= PCI_HAS_IO_ECS;
+	return 0;
+}
+
+postcore_initcall(enable_pci_io_ecs);
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 27d61b63def..9915293500f 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -265,14 +265,16 @@ void __init pci_direct_init(int type)
 		 type);
 	if (type == 1) {
 		raw_pci_ops = &pci_direct_conf1;
-		if (!raw_pci_ext_ops && cpu_has_pci_ext_cfg) {
-			printk(KERN_INFO "PCI: Using configuration type 1 "
-			       "for extended access\n");
-			raw_pci_ext_ops = &pci_direct_conf1;
-		}
-	} else {
-		raw_pci_ops = &pci_direct_conf2;
+		if (raw_pci_ext_ops)
+			return;
+		if (!(pci_probe & PCI_HAS_IO_ECS))
+			return;
+		printk(KERN_INFO "PCI: Using configuration type 1 "
+		       "for extended access\n");
+		raw_pci_ext_ops = &pci_direct_conf1;
+		return;
 	}
+	raw_pci_ops = &pci_direct_conf2;
 }
 
 int __init pci_direct_probe(void)
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index f3972b12c60..fd53db50eee 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -27,6 +27,7 @@
 #define PCI_CAN_SKIP_ISA_ALIGN	0x8000
 #define PCI_USE__CRS		0x10000
 #define PCI_CHECK_ENABLE_AMD_MMCONF	0x20000
+#define PCI_HAS_IO_ECS		0x40000
 
 extern unsigned int pci_probe;
 extern unsigned long pirq_table_addr;
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 40fcbba00f1..0d609c837a4 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -79,7 +79,6 @@
 #define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well on this CPU */
 #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
 #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
-#define X86_FEATURE_PCI_EXT_CFG	(3*32+19) /* PCI extended cfg access */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
@@ -188,7 +187,6 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_gbpages		boot_cpu_has(X86_FEATURE_GBPAGES)
 #define cpu_has_arch_perfmon	boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_pat		boot_cpu_has(X86_FEATURE_PAT)
-#define cpu_has_pci_ext_cfg	boot_cpu_has(X86_FEATURE_PCI_EXT_CFG)
 
 #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
 # define cpu_has_invlpg		1
-- 
cgit v1.2.3-70-g09d2


From ce0c0e50f94e8c55b00a722e8c6e8d6c802be211 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 2 May 2008 11:46:49 +0200
Subject: x86, generic: CPA add statistics about state of direct mapping v4

Add information about the mapping state of the direct mapping to
/proc/meminfo. I chose /proc/meminfo because that is where all the other
memory statistics are too and it is a generally useful metric even
outside debugging situations. A lot of split kernel pages means the
kernel will run slower.

This way we can see how many large pages are really used for it and how
many are split.

Useful for general insight into the kernel.

v2: Add hotplug locking to 64bit to plug a very obscure theoretical race.
    32bit doesn't need it because it doesn't support hotadd for lowmem.
    Fix some typos
v3: Rename dpages_cnt
    Add CONFIG ifdef for count update as requested by tglx
    Expand description
v4: Fix stupid bugs added in v3
    Move update_page_count to pageattr.c

Signed-off-by: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_32.c     |  5 +++++
 arch/x86/mm/init_64.c     |  7 +++++++
 arch/x86/mm/pageattr.c    | 35 +++++++++++++++++++++++++++++++++++
 fs/proc/proc_misc.c       |  7 +++++++
 include/asm-x86/pgtable.h |  3 +++
 5 files changed, 57 insertions(+)

(limited to 'include/asm-x86')

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ec30d10154b..0269ac230bf 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -162,6 +162,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 	pgd_t *pgd;
 	pmd_t *pmd;
 	pte_t *pte;
+	unsigned pages_2m = 0, pages_4k = 0;
 
 	pgd_idx = pgd_index(PAGE_OFFSET);
 	pgd = pgd_base + pgd_idx;
@@ -197,6 +198,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 				    is_kernel_text(addr2))
 					prot = PAGE_KERNEL_LARGE_EXEC;
 
+				pages_2m++;
 				set_pmd(pmd, pfn_pmd(pfn, prot));
 
 				pfn += PTRS_PER_PTE;
@@ -213,11 +215,14 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 				if (is_kernel_text(addr))
 					prot = PAGE_KERNEL_EXEC;
 
+				pages_4k++;
 				set_pte(pte, pfn_pte(pfn, prot));
 			}
 			max_pfn_mapped = pfn;
 		}
 	}
+	update_page_count(PG_LEVEL_2M, pages_2m);
+	update_page_count(PG_LEVEL_4K, pages_4k);
 }
 
 static inline int page_kills_ppro(unsigned long pagenr)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 819dad973b1..5e438385905 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -312,6 +312,8 @@ __meminit void early_iounmap(void *addr, unsigned long size)
 static unsigned long __meminit
 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 {
+	unsigned long pages = 0;
+
 	int i = pmd_index(address);
 
 	for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
@@ -328,9 +330,11 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 		if (pmd_val(*pmd))
 			continue;
 
+		pages++;
 		set_pte((pte_t *)pmd,
 			pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
 	}
+	update_page_count(PG_LEVEL_2M, pages);
 	return address;
 }
 
@@ -350,6 +354,7 @@ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
 static unsigned long __meminit
 phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
 {
+	unsigned long pages = 0;
 	unsigned long last_map_addr = end;
 	int i = pud_index(addr);
 
@@ -374,6 +379,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
 		}
 
 		if (direct_gbpages) {
+			pages++;
 			set_pte((pte_t *)pud,
 				pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
 			last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
@@ -390,6 +396,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
 		unmap_low_page(pmd);
 	}
 	__flush_tlb_all();
+	update_page_count(PG_LEVEL_1G, pages);
 
 	return last_map_addr >> PAGE_SHIFT;
 }
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 60bcb5b6a37..668205bca15 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -34,6 +34,19 @@ struct cpa_data {
 	unsigned	force_split : 1;
 };
 
+static unsigned long direct_pages_count[PG_LEVEL_NUM];
+
+void __meminit update_page_count(int level, unsigned long pages)
+{
+#ifdef CONFIG_PROC_FS
+	unsigned long flags;
+	/* Protect against CPA */
+	spin_lock_irqsave(&pgd_lock, flags);
+	direct_pages_count[level] += pages;
+	spin_unlock_irqrestore(&pgd_lock, flags);
+#endif
+}
+
 #ifdef CONFIG_X86_64
 
 static inline unsigned long highmap_start_pfn(void)
@@ -500,6 +513,12 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
 		set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
 
+	if (address >= (unsigned long)__va(0) &&
+		address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) {
+		direct_pages_count[level]--;
+		direct_pages_count[level - 1] += PTRS_PER_PTE;
+	}
+
 	/*
 	 * Install the new, split up pagetable. Important details here:
 	 *
@@ -1029,6 +1048,22 @@ bool kernel_page_present(struct page *page)
 
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
+#ifdef CONFIG_PROC_FS
+int arch_report_meminfo(char *page)
+{
+	int n;
+	n = sprintf(page, "DirectMap4k:  %8lu\n"
+			  "DirectMap2M:  %8lu\n",
+			direct_pages_count[PG_LEVEL_4K],
+			direct_pages_count[PG_LEVEL_2M]);
+#ifdef CONFIG_X86_64
+	n += sprintf(page + n, "DirectMap1G:  %8lu\n",
+			direct_pages_count[PG_LEVEL_1G]);
+#endif
+	return n;
+}
+#endif
+
 /*
  * The testcases use internal knowledge of the implementation that shouldn't
  * be exposed to the rest of the kernel. Include these directly here.
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 7e277f2ad46..15f7bd41029 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -123,6 +123,11 @@ static int uptime_read_proc(char *page, char **start, off_t off,
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
+int __attribute__((weak)) arch_report_meminfo(char *page)
+{
+	return 0;
+}
+
 static int meminfo_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
@@ -221,6 +226,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 
 		len += hugetlb_report_meminfo(page + len);
 
+	len += arch_report_meminfo(page + len);
+
 	return proc_calc_metrics(page, start, off, count, eof, len);
 #undef K
 }
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 97c271b2910..111564fa5e7 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -369,8 +369,11 @@ enum {
 	PG_LEVEL_4K,
 	PG_LEVEL_2M,
 	PG_LEVEL_1G,
+	PG_LEVEL_NUM
 };
 
+void update_page_count(int level, unsigned long pages);
+
 /*
  * Helper function that returns the kernel pagetable entry controlling
  * the virtual address 'address'. NULL means no pagetable entry present.
-- 
cgit v1.2.3-70-g09d2


From 65280e613fada41704f35709b6c8952ca4b8750c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 5 May 2008 16:35:21 +0200
Subject: x86: janitor CPA statistics patch

1) Remove __meminit from update_pages_count. It is used inside
split_pages()

2) Make the code depend on PROC_FS. Doing statistics for nothing is
useless and not adding useless code is nice to the Linux tiny folks.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr.c    | 48 +++++++++++++++++++++++++----------------------
 include/asm-x86/pgtable.h |  6 +++++-
 2 files changed, 31 insertions(+), 23 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 668205bca15..0a3f5e047f8 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -34,18 +34,40 @@ struct cpa_data {
 	unsigned	force_split : 1;
 };
 
+#ifdef CONFIG_PROC_FS
 static unsigned long direct_pages_count[PG_LEVEL_NUM];
 
-void __meminit update_page_count(int level, unsigned long pages)
+void update_page_count(int level, unsigned long pages)
 {
-#ifdef CONFIG_PROC_FS
 	unsigned long flags;
+
 	/* Protect against CPA */
 	spin_lock_irqsave(&pgd_lock, flags);
 	direct_pages_count[level] += pages;
 	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+static void split_page_count(int level)
+{
+	direct_pages_count[level]--;
+	direct_pages_count[level - 1] += PTRS_PER_PTE;
+}
+
+int arch_report_meminfo(char *page)
+{
+	int n = sprintf(page, "DirectMap4k:  %8lu\n"
+			"DirectMap2M:  %8lu\n",
+			direct_pages_count[PG_LEVEL_4K],
+			direct_pages_count[PG_LEVEL_2M]);
+#ifdef CONFIG_X86_64
+	n += sprintf(page + n, "DirectMap1G:  %8lu\n",
+		     direct_pages_count[PG_LEVEL_1G]);
 #endif
+	return n;
 }
+#else
+static inline void split_page_count(int level) { }
+#endif
 
 #ifdef CONFIG_X86_64
 
@@ -514,10 +536,8 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 		set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
 
 	if (address >= (unsigned long)__va(0) &&
-		address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) {
-		direct_pages_count[level]--;
-		direct_pages_count[level - 1] += PTRS_PER_PTE;
-	}
+		address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
+		split_page_count(level);
 
 	/*
 	 * Install the new, split up pagetable. Important details here:
@@ -1048,22 +1068,6 @@ bool kernel_page_present(struct page *page)
 
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
-#ifdef CONFIG_PROC_FS
-int arch_report_meminfo(char *page)
-{
-	int n;
-	n = sprintf(page, "DirectMap4k:  %8lu\n"
-			  "DirectMap2M:  %8lu\n",
-			direct_pages_count[PG_LEVEL_4K],
-			direct_pages_count[PG_LEVEL_2M]);
-#ifdef CONFIG_X86_64
-	n += sprintf(page + n, "DirectMap1G:  %8lu\n",
-			direct_pages_count[PG_LEVEL_1G]);
-#endif
-	return n;
-}
-#endif
-
 /*
  * The testcases use internal knowledge of the implementation that shouldn't
  * be exposed to the rest of the kernel. Include these directly here.
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 111564fa5e7..bd26559dc8e 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -372,7 +372,11 @@ enum {
 	PG_LEVEL_NUM
 };
 
-void update_page_count(int level, unsigned long pages);
+#ifdef CONFIG_PROC_FS
+extern void update_page_count(int level, unsigned long pages);
+#else
+static inline void update_page_count(int level, unsigned long pages) { }
+#endif
 
 /*
  * Helper function that returns the kernel pagetable entry controlling
-- 
cgit v1.2.3-70-g09d2


From ecd29476ae0143b1c3641edfa76c0fc3e9ad3021 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 21 May 2008 22:09:19 +0100
Subject: x86: I/O APIC: remove parameters to fiddle with the 8259A

Remove the "disable_8254_timer" and "enable_8254_timer" kernel
parameters.  Now that AEOI acknowledgements are no longer needed for
correct timer operation, the 8259A can be kept disabled unconditionally
unless interrupts, either timer or watchdog ones, are actually passed
through it.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/early-quirks.c | 13 -------------
 arch/x86/kernel/io_apic_32.c   | 21 ++-------------------
 arch/x86/kernel/io_apic_64.c   | 21 ++-------------------
 include/asm-x86/apic.h         |  1 -
 4 files changed, 4 insertions(+), 52 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 9f51e1ea9e8..84fd9f2a28f 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -98,17 +98,6 @@ static void __init nvidia_bugs(int num, int slot, int func)
 
 }
 
-static void __init ati_bugs(int num, int slot, int func)
-{
-#ifdef CONFIG_X86_IO_APIC
-	if (timer_over_8254 == 1) {
-		timer_over_8254 = 0;
-		printk(KERN_INFO
-		"ATI board detected. Disabling timer routing over 8254.\n");
-	}
-#endif
-}
-
 #define QFLAG_APPLY_ONCE 	0x1
 #define QFLAG_APPLIED		0x2
 #define QFLAG_DONE		(QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -126,8 +115,6 @@ static struct chipset early_qrk[] __initdata = {
 	  PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
 	{ PCI_VENDOR_ID_VIA, PCI_ANY_ID,
 	  PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs },
-	{ PCI_VENDOR_ID_ATI, PCI_ANY_ID,
-	  PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, ati_bugs },
 	{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
 	  PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config },
 	{}
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index c64b3f5cc12..61cf366d040 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -58,7 +58,6 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 static DEFINE_SPINLOCK(ioapic_lock);
 static DEFINE_SPINLOCK(vector_lock);
 
-int timer_over_8254 __initdata = 1;
 
 /*
  *	Is the SiS APIC rmw bug present ?
@@ -2157,8 +2156,6 @@ static inline void __init check_timer(void)
 	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	init_8259A(1);
 	timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
-	if (timer_over_8254 > 0)
-		enable_8259A_irq(0);
 
 	pin1  = find_isa_irq_pin(0, mp_INT);
 	apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2175,7 +2172,6 @@ static inline void __init check_timer(void)
 		unmask_IO_APIC_irq(0);
 		if (timer_irq_works()) {
 			if (nmi_watchdog == NMI_IO_APIC) {
-				disable_8259A_irq(0);
 				setup_nmi();
 				enable_8259A_irq(0);
 			}
@@ -2195,6 +2191,7 @@ static inline void __init check_timer(void)
 		 * legacy devices should be connected to IO APIC #0
 		 */
 		setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
+		enable_8259A_irq(0);
 		if (timer_irq_works()) {
 			printk("works.\n");
 			if (pin1 != -1)
@@ -2209,6 +2206,7 @@ static inline void __init check_timer(void)
 		/*
 		 * Cleanup, just in case ...
 		 */
+		disable_8259A_irq(0);
 		clear_IO_APIC_pin(apic2, pin2);
 	}
 	printk(" failed.\n");
@@ -2221,7 +2219,6 @@ static inline void __init check_timer(void)
 
 	printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 
-	disable_8259A_irq(0);
 	set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
 				      "fasteoi");
 	apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);	/* Fixed mode */
@@ -2292,20 +2289,6 @@ void __init setup_IO_APIC(void)
 		print_IO_APIC();
 }
 
-static int __init setup_disable_8254_timer(char *s)
-{
-	timer_over_8254 = -1;
-	return 1;
-}
-static int __init setup_enable_8254_timer(char *s)
-{
-	timer_over_8254 = 2;
-	return 1;
-}
-
-__setup("disable_8254_timer", setup_disable_8254_timer);
-__setup("enable_8254_timer", setup_enable_8254_timer);
-
 /*
  *	Called after all the initialization is done. If we didnt find any
  *	APIC bugs then we can allow the modify fast path
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 7c34e38e421..9f16ca4b5a2 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -90,7 +90,6 @@ static int no_timer_check;
 
 static int disable_timer_pin_1 __initdata;
 
-int timer_over_8254 __initdata = 1;
 
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
@@ -430,20 +429,6 @@ static int __init disable_timer_pin_setup(char *arg)
 }
 __setup("disable_timer_pin_1", disable_timer_pin_setup);
 
-static int __init setup_disable_8254_timer(char *s)
-{
-	timer_over_8254 = -1;
-	return 1;
-}
-static int __init setup_enable_8254_timer(char *s)
-{
-	timer_over_8254 = 2;
-	return 1;
-}
-
-__setup("disable_8254_timer", setup_disable_8254_timer);
-__setup("enable_8254_timer", setup_enable_8254_timer);
-
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -1674,8 +1659,6 @@ static inline void __init check_timer(void)
 	 */
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	init_8259A(1);
-	if (timer_over_8254 > 0)
-		enable_8259A_irq(0);
 
 	pin1  = find_isa_irq_pin(0, mp_INT);
 	apic1 = find_isa_irq_apic(0, mp_INT);
@@ -1693,7 +1676,6 @@ static inline void __init check_timer(void)
 		if (!no_timer_check && timer_irq_works()) {
 			nmi_watchdog_default();
 			if (nmi_watchdog == NMI_IO_APIC) {
-				disable_8259A_irq(0);
 				setup_nmi();
 				enable_8259A_irq(0);
 			}
@@ -1715,6 +1697,7 @@ static inline void __init check_timer(void)
 		 * legacy devices should be connected to IO APIC #0
 		 */
 		setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector);
+		enable_8259A_irq(0);
 		if (timer_irq_works()) {
 			apic_printk(APIC_VERBOSE," works.\n");
 			nmi_watchdog_default();
@@ -1726,6 +1709,7 @@ static inline void __init check_timer(void)
 		/*
 		 * Cleanup, just in case ...
 		 */
+		disable_8259A_irq(0);
 		clear_IO_APIC_pin(apic2, pin2);
 	}
 	apic_printk(APIC_VERBOSE," failed.\n");
@@ -1737,7 +1721,6 @@ static inline void __init check_timer(void)
 
 	apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 
-	disable_8259A_irq(0);
 	irq_desc[0].chip = &lapic_irq_type;
 	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
 	enable_8259A_irq(0);
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index be9639a9a18..9c2dfd926e5 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -36,7 +36,6 @@ extern void generic_apic_probe(void);
 #ifdef CONFIG_X86_LOCAL_APIC
 
 extern int apic_verbosity;
-extern int timer_over_8254;
 extern int local_apic_timer_c2_ok;
 extern int local_apic_timer_disabled;
 
-- 
cgit v1.2.3-70-g09d2


From 35542c5ebced864776d90d83d1e255016fd4c084 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 21 May 2008 22:10:22 +0100
Subject: x86: I/O APIC: clean up the 8259A on a NMI watchdog failure

There is no point in keeping the 8259A enabled if the I/O APIC NMI
watchdog has failed and the 8259A is not used to pass through regular
timer interrupts.  This fixes problems with some systems where some logic
gets confused.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c |  2 ++
 arch/x86/kernel/io_apic_64.c |  2 ++
 arch/x86/kernel/nmi_32.c     |  4 ++++
 arch/x86/kernel/nmi_64.c     | 11 +++++++++--
 include/asm-x86/io_apic.h    |  4 ++++
 5 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 41218ac75d1..7c7d88ebb96 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -58,6 +58,7 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 static DEFINE_SPINLOCK(ioapic_lock);
 static DEFINE_SPINLOCK(vector_lock);
 
+int timer_through_8259 __initdata;
 
 /*
  *	Is the SiS APIC rmw bug present ?
@@ -2194,6 +2195,7 @@ static inline void __init check_timer(void)
 		enable_8259A_irq(0);
 		if (timer_irq_works()) {
 			printk("works.\n");
+			timer_through_8259 = 1;
 			if (pin1 != -1)
 				replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
 			else
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index aa45a85c4d1..2a60bfb7446 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -90,6 +90,7 @@ static int no_timer_check;
 
 static int disable_timer_pin_1 __initdata;
 
+int timer_through_8259 __initdata;
 
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
@@ -1700,6 +1701,7 @@ static inline void __init check_timer(void)
 		enable_8259A_irq(0);
 		if (timer_irq_works()) {
 			apic_printk(APIC_VERBOSE," works.\n");
+			timer_through_8259 = 1;
 			nmi_watchdog_default();
 			if (nmi_watchdog == NMI_IO_APIC) {
 				disable_8259A_irq(0);
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index d322901c38e..6580dae4627 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -24,6 +24,8 @@
 #include <linux/kdebug.h>
 #include <linux/slab.h>
 
+#include <asm/i8259.h>
+#include <asm/io_apic.h>
 #include <asm/smp.h>
 #include <asm/nmi.h>
 #include <asm/timer.h>
@@ -131,6 +133,8 @@ int __init check_nmi_watchdog(void)
 	kfree(prev_nmi_count);
 	return 0;
 error:
+	if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
+		disable_8259A_irq(0);
 	timer_ack = 0;
 
 	return -1;
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
index 5a29ded994f..0060e44e898 100644
--- a/arch/x86/kernel/nmi_64.c
+++ b/arch/x86/kernel/nmi_64.c
@@ -21,6 +21,8 @@
 #include <linux/cpumask.h>
 #include <linux/kdebug.h>
 
+#include <asm/i8259.h>
+#include <asm/io_apic.h>
 #include <asm/smp.h>
 #include <asm/nmi.h>
 #include <asm/proto.h>
@@ -90,7 +92,7 @@ int __init check_nmi_watchdog(void)
 
 	prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
 	if (!prev_nmi_count)
-		return -1;
+		goto error;
 
 	printk(KERN_INFO "Testing NMI watchdog ... ");
 
@@ -121,7 +123,7 @@ int __init check_nmi_watchdog(void)
 	if (!atomic_read(&nmi_active)) {
 		kfree(prev_nmi_count);
 		atomic_set(&nmi_active, -1);
-		return -1;
+		goto error;
 	}
 	printk("OK.\n");
 
@@ -132,6 +134,11 @@ int __init check_nmi_watchdog(void)
 
 	kfree(prev_nmi_count);
 	return 0;
+error:
+	if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
+		disable_8259A_irq(0);
+
+	return -1;
 }
 
 static int __init setup_nmi_watchdog(char *str)
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index d593e14f034..a6732566ad0 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -137,6 +137,9 @@ extern int sis_apic_bug;
 /* 1 if "noapic" boot option passed */
 extern int skip_ioapic_setup;
 
+/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
+extern int timer_through_8259;
+
 static inline void disable_ioapic_setup(void)
 {
 	skip_ioapic_setup = 1;
@@ -162,6 +165,7 @@ extern void ioapic_init_mappings(void);
 
 #else  /* !CONFIG_X86_IO_APIC */
 #define io_apic_assign_pci_irqs 0
+static const int timer_through_8259 = 0;
 #endif
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 071565095897dff9668cd4a2343160ae31538c27 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 12 May 2008 15:43:37 +0200
Subject: x86: move pci_routirq declaration to pci.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/pci/acpi.c   | 1 -
 include/asm-x86/pci.h | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index d95de2f199c..464279da49c 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -218,7 +218,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
 	return bus;
 }
 
-extern int pci_routeirq;
 static int __init pci_acpi_init(void)
 {
 	struct pci_dev *dev = NULL;
diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h
index 30bbde0cb34..2db14cf17db 100644
--- a/include/asm-x86/pci.h
+++ b/include/asm-x86/pci.h
@@ -18,6 +18,8 @@ struct pci_sysdata {
 #endif
 };
 
+extern int pci_routeirq;
+
 /* scan a bus after allocating a pci_sysdata for it */
 extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
 					    int node);
-- 
cgit v1.2.3-70-g09d2


From 148b50830993acc67129f09c544d9167291e5458 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Fri, 6 Jun 2008 03:27:41 +0100
Subject: x86: NMI watchdog: Downgrade helper

A downgrade helper for the NMI watchdog to be used in all places where
the I/O APIC watchdog may have been requested, but the I/O APIC is found
not to be there or meant to be left disabled.  This is so that the
reconfiguration is cosistent and defined in a single place only.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/nmi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index 1e363021e72..745d8c847de 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h
@@ -78,6 +78,11 @@ extern int unknown_nmi_panic;
 void __trigger_all_cpu_backtrace(void);
 #define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
 
+static inline void localise_nmi_watchdog(void)
+{
+	if (nmi_watchdog == NMI_IO_APIC)
+		nmi_watchdog = NMI_LOCAL_APIC;
+}
 #endif
 
 void lapic_watchdog_stop(void);
-- 
cgit v1.2.3-70-g09d2


From d788bada2f6c49673f85338ac4c0c642e5e52cff Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Fri, 6 Jun 2008 03:28:13 +0100
Subject: x86: APIC/SMP: Downgrade the NMI watchdog for "noapic"

 If configured to use the I/O APIC, the NMI watchdog is deemed to fail if
the chip has been deactivated as a result of "noapic".  Downgrade to the
local APIC watchdog similarly to what is done for the UP case.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-default/smpboot_hooks.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-default/smpboot_hooks.h b/include/asm-x86/mach-default/smpboot_hooks.h
index 56d0e1fa025..b63c5218200 100644
--- a/include/asm-x86/mach-default/smpboot_hooks.h
+++ b/include/asm-x86/mach-default/smpboot_hooks.h
@@ -41,8 +41,10 @@ static inline void __init smpboot_setup_io_apic(void)
 	 */
 	if (!skip_ioapic_setup && nr_ioapics)
 		setup_IO_APIC();
-	else
+	else {
 		nr_ioapics = 0;
+		localise_nmi_watchdog();
+	}
 }
 
 static inline void smpboot_clear_io_apic(void)
-- 
cgit v1.2.3-70-g09d2


From d3f020d2f9bb9a61ca64d4eb058c9f68f827a2b4 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sat, 7 Jun 2008 19:53:56 +0400
Subject: x86, io-apic: define names for redirection table entry fields

Each I/O APIC redirection table entry has a number of fields.
Define names for them to eliminate reference by hard coded
numbers.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/io_apic.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index a6732566ad0..dc0f55f2b03 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -11,6 +11,15 @@
  * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
  */
 
+/* I/O Unit Redirection Table */
+#define IO_APIC_REDIR_VECTOR_MASK	0x000FF
+#define IO_APIC_REDIR_DEST_LOGICAL	0x00800
+#define IO_APIC_REDIR_DEST_PHYSICAL	0x00000
+#define IO_APIC_REDIR_SEND_PENDING	(1 << 12)
+#define IO_APIC_REDIR_REMOTE_IRR	(1 << 14)
+#define IO_APIC_REDIR_LEVEL_TRIGGER	(1 << 15)
+#define IO_APIC_REDIR_MASKED		(1 << 16)
+
 /*
  * The structure of the IO-APIC:
  */
-- 
cgit v1.2.3-70-g09d2


From 8b2ef1d7285740953a2c4ef7faf15fdfc5e2f358 Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Sun, 8 Jun 2008 15:46:30 +0200
Subject: x86: add flags parameter to reserve_bootmem_generic()

This patch adds a 'flags' parameter to reserve_bootmem_generic() like it
already has been added in reserve_bootmem() with commit
72a7fe3967dbf86cb34e24fbf1d957fe24d2f246.

It also changes all users to use BOOTMEM_DEFAULT, which doesn't effectively
change the behaviour. Since the change is x86-specific, I don't think it's
necessary to add a new API for migration. There are only 4 users of that
function.

The change is necessary for the next patch, using reserve_bootmem_generic()
for crashkernel reservation.

Signed-off-by: Bernhard Walle <bwalle@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/mpparse.c |  5 +++--
 arch/x86/mm/init_64.c     | 17 ++++++++++++-----
 include/asm-x86/proto.h   |  2 +-
 3 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 1cc7a4b8643..6ae60909b60 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -880,10 +880,11 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
 			if (!reserve)
 				return 1;
 
-			reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
+			reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
+				BOOTMEM_DEFAULT);
 			if (mpf->mpf_physptr)
 				reserve_bootmem_generic(mpf->mpf_physptr,
-							PAGE_SIZE);
+					PAGE_SIZE, BOOTMEM_DEFAULT);
 #endif
 		return 1;
 		}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 819dad973b1..bf7bf1de6c2 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -799,12 +799,13 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
-void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
+int __init reserve_bootmem_generic(unsigned long phys, unsigned len, int flags)
 {
 #ifdef CONFIG_NUMA
 	int nid, next_nid;
 #endif
 	unsigned long pfn = phys >> PAGE_SHIFT;
+	int ret;
 
 	if (pfn >= end_pfn) {
 		/*
@@ -812,11 +813,11 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 		 * firmware tables:
 		 */
 		if (pfn < max_pfn_mapped)
-			return;
+			return -EFAULT;
 
 		printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
 				phys, len);
-		return;
+		return -EFAULT;
 	}
 
 	/* Should check here against the e820 map to avoid double free */
@@ -824,9 +825,13 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 	nid = phys_to_nid(phys);
 	next_nid = phys_to_nid(phys + len - 1);
 	if (nid == next_nid)
-		reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
+		ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
 	else
-		reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
+		ret = reserve_bootmem(phys, len, flags);
+
+	if (ret != 0)
+		return ret;
+
 #else
 	reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
 #endif
@@ -835,6 +840,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 		dma_reserve += len / PAGE_SIZE;
 		set_dma_reserve(dma_reserve);
 	}
+
+	return 0;
 }
 
 int kern_addr_valid(unsigned long addr)
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 6c8b41b03f6..a9f51472521 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -14,7 +14,7 @@ extern void ia32_syscall(void);
 extern void ia32_cstar_target(void);
 extern void ia32_sysenter_target(void);
 
-extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
+extern int reserve_bootmem_generic(unsigned long phys, unsigned len, int flags);
 
 extern void syscall32_cpu_init(void);
 
-- 
cgit v1.2.3-70-g09d2


From ab4a465e96adf2f3a8aaa95384bacfa9ab661e35 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 10 Jun 2008 12:55:54 -0700
Subject: x86: e820 merge parsing of the mem=/memmap= boot parameters

since we now have 32-bit support for e820_register_active_regions(),
we can merge the parsing of the mem=/memmap= boot parameters.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c    |  86 +++++++++++++++++++++++++++++++++
 arch/x86/kernel/e820_32.c | 120 ----------------------------------------------
 arch/x86/kernel/e820_64.c |  69 --------------------------
 include/asm-x86/e820.h    |   2 +
 include/asm-x86/e820_32.h |   2 -
 include/asm-x86/e820_64.h |   2 -
 6 files changed, 88 insertions(+), 193 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 68aba413d40..4f2cd5d179e 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -890,3 +890,89 @@ u64 __init e820_hole_size(u64 start, u64 end)
 	}
 	return end - start - ((u64)ram << PAGE_SHIFT);
 }
+
+static void early_panic(char *msg)
+{
+	early_printk(msg);
+	panic(msg);
+}
+
+/* "mem=nopentium" disables the 4MB page tables. */
+static int __init parse_memopt(char *p)
+{
+	u64 mem_size;
+
+	if (!p)
+		return -EINVAL;
+
+#ifdef CONFIG_X86_32
+	if (!strcmp(p, "nopentium")) {
+		setup_clear_cpu_cap(X86_FEATURE_PSE);
+		return 0;
+	}
+#endif
+
+	mem_size = memparse(p, &p);
+	end_user_pfn = mem_size>>PAGE_SHIFT;
+	return 0;
+}
+early_param("mem", parse_memopt);
+
+static int userdef __initdata;
+
+static int __init parse_memmap_opt(char *p)
+{
+	char *oldp;
+	u64 start_at, mem_size;
+
+	if (!strcmp(p, "exactmap")) {
+#ifdef CONFIG_CRASH_DUMP
+		/*
+		 * If we are doing a crash dump, we still need to know
+		 * the real mem size before original memory map is
+		 * reset.
+		 */
+		e820_register_active_regions(0, 0, -1UL);
+		saved_max_pfn = e820_end_of_ram();
+		remove_all_active_ranges();
+#endif
+		e820.nr_map = 0;
+		userdef = 1;
+		return 0;
+	}
+
+	oldp = p;
+	mem_size = memparse(p, &p);
+	if (p == oldp)
+		return -EINVAL;
+
+	userdef = 1;
+	if (*p == '@') {
+		start_at = memparse(p+1, &p);
+		add_memory_region(start_at, mem_size, E820_RAM);
+	} else if (*p == '#') {
+		start_at = memparse(p+1, &p);
+		add_memory_region(start_at, mem_size, E820_ACPI);
+	} else if (*p == '$') {
+		start_at = memparse(p+1, &p);
+		add_memory_region(start_at, mem_size, E820_RESERVED);
+	} else {
+		end_user_pfn = (mem_size >> PAGE_SHIFT);
+	}
+	return *p == '\0' ? 0 : -EINVAL;
+}
+early_param("memmap", parse_memmap_opt);
+
+void __init finish_e820_parsing(void)
+{
+	if (userdef) {
+		int nr = e820.nr_map;
+
+		if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
+			early_panic("Invalid user supplied memory map");
+		e820.nr_map = nr;
+
+		printk(KERN_INFO "user-defined physical RAM map:\n");
+		e820_print_map("user");
+	}
+}
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index e8a3b968c9f..8de3df9548d 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -207,36 +207,6 @@ void __init init_iomem_resources(struct resource *code_resource,
 	}
 }
 
-void __init limit_regions(unsigned long long size)
-{
-	unsigned long long current_addr;
-	int i;
-
-	e820_print_map("limit_regions start");
-	for (i = 0; i < e820.nr_map; i++) {
-		current_addr = e820.map[i].addr + e820.map[i].size;
-		if (current_addr < size)
-			continue;
-
-		if (e820.map[i].type != E820_RAM)
-			continue;
-
-		if (e820.map[i].addr >= size) {
-			/*
-			 * This region starts past the end of the
-			 * requested size, skip it completely.
-			 */
-			e820.nr_map = i;
-		} else {
-			e820.nr_map = i + 1;
-			e820.map[i].size -= current_addr - size;
-		}
-		e820_print_map("limit_regions endfor");
-		return;
-	}
-	e820_print_map("limit_regions endfunc");
-}
-
 /* Overridden in paravirt.c if CONFIG_PARAVIRT */
 char * __init __attribute__((weak)) memory_setup(void)
 {
@@ -249,93 +219,3 @@ void __init setup_memory_map(void)
 	e820_print_map(memory_setup());
 }
 
-static int __initdata user_defined_memmap;
-
-/*
- * "mem=nopentium" disables the 4MB page tables.
- * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
- * to <mem>, overriding the bios size.
- * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
- * <start> to <start>+<mem>, overriding the bios size.
- *
- * HPA tells me bootloaders need to parse mem=, so no new
- * option should be mem=  [also see Documentation/i386/boot.txt]
- */
-static int __init parse_mem(char *arg)
-{
-	if (!arg)
-		return -EINVAL;
-
-	if (strcmp(arg, "nopentium") == 0) {
-		setup_clear_cpu_cap(X86_FEATURE_PSE);
-	} else {
-		/* If the user specifies memory size, we
-		 * limit the BIOS-provided memory map to
-		 * that size. exactmap can be used to specify
-		 * the exact map. mem=number can be used to
-		 * trim the existing memory map.
-		 */
-		unsigned long long mem_size;
-
-		mem_size = memparse(arg, &arg);
-		limit_regions(mem_size);
-		user_defined_memmap = 1;
-	}
-	return 0;
-}
-early_param("mem", parse_mem);
-
-static int __init parse_memmap(char *arg)
-{
-	if (!arg)
-		return -EINVAL;
-
-	if (strcmp(arg, "exactmap") == 0) {
-#ifdef CONFIG_CRASH_DUMP
-		/* If we are doing a crash dump, we
-		 * still need to know the real mem
-		 * size before original memory map is
-		 * reset.
-		 */
-		e820_register_active_regions(0, 0, -1UL);
-		saved_max_pfn = e820_end_of_ram();
-		remove_all_active_ranges();
-#endif
-		e820.nr_map = 0;
-		user_defined_memmap = 1;
-	} else {
-		/* If the user specifies memory size, we
-		 * limit the BIOS-provided memory map to
-		 * that size. exactmap can be used to specify
-		 * the exact map. mem=number can be used to
-		 * trim the existing memory map.
-		 */
-		unsigned long long start_at, mem_size;
-
-		mem_size = memparse(arg, &arg);
-		if (*arg == '@') {
-			start_at = memparse(arg+1, &arg);
-			add_memory_region(start_at, mem_size, E820_RAM);
-		} else if (*arg == '#') {
-			start_at = memparse(arg+1, &arg);
-			add_memory_region(start_at, mem_size, E820_ACPI);
-		} else if (*arg == '$') {
-			start_at = memparse(arg+1, &arg);
-			add_memory_region(start_at, mem_size, E820_RESERVED);
-		} else {
-			limit_regions(mem_size);
-			user_defined_memmap = 1;
-		}
-	}
-	return 0;
-}
-early_param("memmap", parse_memmap);
-
-void __init finish_e820_parsing(void)
-{
-	if (user_defined_memmap) {
-		printk(KERN_INFO "user-defined physical RAM map:\n");
-		e820_print_map("user");
-	}
-}
-
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 0afee2ca0bf..47952b1690b 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -96,75 +96,6 @@ char *__init machine_specific_memory_setup(void)
 	return who;
 }
 
-static int __init parse_memopt(char *p)
-{
-	if (!p)
-		return -EINVAL;
-	end_user_pfn = memparse(p, &p);
-	end_user_pfn >>= PAGE_SHIFT;
-	return 0;
-}
-early_param("mem", parse_memopt);
-
-static int userdef __initdata;
-
-static int __init parse_memmap_opt(char *p)
-{
-	char *oldp;
-	unsigned long long start_at, mem_size;
-
-	if (!strcmp(p, "exactmap")) {
-#ifdef CONFIG_CRASH_DUMP
-		/*
-		 * If we are doing a crash dump, we still need to know
-		 * the real mem size before original memory map is
-		 * reset.
-		 */
-		e820_register_active_regions(0, 0, -1UL);
-		saved_max_pfn = e820_end_of_ram();
-		remove_all_active_ranges();
-#endif
-		e820.nr_map = 0;
-		userdef = 1;
-		return 0;
-	}
-
-	oldp = p;
-	mem_size = memparse(p, &p);
-	if (p == oldp)
-		return -EINVAL;
-
-	userdef = 1;
-	if (*p == '@') {
-		start_at = memparse(p+1, &p);
-		add_memory_region(start_at, mem_size, E820_RAM);
-	} else if (*p == '#') {
-		start_at = memparse(p+1, &p);
-		add_memory_region(start_at, mem_size, E820_ACPI);
-	} else if (*p == '$') {
-		start_at = memparse(p+1, &p);
-		add_memory_region(start_at, mem_size, E820_RESERVED);
-	} else {
-		end_user_pfn = (mem_size >> PAGE_SHIFT);
-	}
-	return *p == '\0' ? 0 : -EINVAL;
-}
-early_param("memmap", parse_memmap_opt);
-
-void __init finish_e820_parsing(void)
-{
-	if (userdef) {
-		int nr = e820.nr_map;
-
-		if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
-			early_panic("Invalid user supplied memory map");
-		e820.nr_map = nr;
-
-		printk(KERN_INFO "user-defined physical RAM map:\n");
-		e820_print_map("user");
-	}
-}
-
 int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
 {
 	int i;
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 8aa32323a18..a5959e3a562 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -98,6 +98,8 @@ extern int e820_find_active_region(const struct e820entry *ei,
 extern void e820_register_active_regions(int nid, unsigned long start_pfn,
 					 unsigned long end_pfn);
 extern u64 e820_hole_size(u64 start, u64 end);
+extern void finish_e820_parsing(void);
+
 #endif /* __ASSEMBLY__ */
 
 #define ISA_START_ADDRESS	0xa0000
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index 212b74c10ef..9135ce6e617 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -19,9 +19,7 @@
 #ifndef __ASSEMBLY__
 
 extern void setup_memory_map(void);
-extern void finish_e820_parsing(void);
 
-extern void limit_regions(unsigned long long size);
 extern void init_iomem_resources(struct resource *code_resource,
 				 struct resource *data_resource,
 				 struct resource *bss_resource);
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index 368585daaa4..478547c6322 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -22,8 +22,6 @@ extern int is_memory_any_valid(unsigned long start, unsigned long end);
 extern int e820_all_non_reserved(unsigned long start, unsigned long end);
 extern int is_memory_all_valid(unsigned long start, unsigned long end);
 
-extern void finish_e820_parsing(void);
-
 #endif/*!__ASSEMBLY__*/
 
 #endif/*__E820_HEADER*/
-- 
cgit v1.2.3-70-g09d2


From d2dbf343329dc777d77488743465f7be4245971d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Fri, 13 Jun 2008 02:00:56 -0700
Subject: x86: clean up reserve_bootmem_generic() and port it to 32-bit

1. add reserve_bootmem_generic for 32bit
2. change len to unsigned long
3. make early_res_to_bootmem to use it

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c    |  2 +-
 arch/x86/kernel/mpparse.c | 18 ++++++------------
 arch/x86/mm/init_32.c     |  6 ++++++
 arch/x86/mm/init_64.c     |  3 ++-
 include/asm-x86/proto.h   |  2 --
 include/linux/bootmem.h   |  2 ++
 6 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 4f2cd5d179e..774063f11be 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -635,7 +635,7 @@ void __init early_res_to_bootmem(u64 start, u64 end)
 			continue;
 		printk(KERN_INFO "  early res: %d [%llx-%llx] %s\n", i,
 			final_start, final_end - 1, r->name);
-		reserve_bootmem(final_start, final_end - final_start,
+		reserve_bootmem_generic(final_start, final_end - final_start,
 				BOOTMEM_DEFAULT);
 	}
 }
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 7ac1b689b70..b62ac6ba141 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -859,10 +859,11 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
 
 			if (!reserve)
 				return 1;
-#ifdef CONFIG_X86_32
-			reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
+			reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
 					BOOTMEM_DEFAULT);
 			if (mpf->mpf_physptr) {
+				unsigned long size = PAGE_SIZE;
+#ifdef CONFIG_X86_32
 				/*
 				 * We cannot access to MPC table to compute
 				 * table size yet, as only few megabytes from
@@ -872,22 +873,15 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
 				 * PAGE_SIZE from mpg->mpf_physptr yields BUG()
 				 * in reserve_bootmem.
 				 */
-				unsigned long size = PAGE_SIZE;
 				unsigned long end = max_low_pfn * PAGE_SIZE;
 				if (mpf->mpf_physptr + size > end)
 					size = end - mpf->mpf_physptr;
-				reserve_bootmem(mpf->mpf_physptr, size,
+#endif
+				reserve_bootmem_generic(mpf->mpf_physptr, size,
 						BOOTMEM_DEFAULT);
 			}
 
-#else
-			reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
-				BOOTMEM_DEFAULT);
-			if (mpf->mpf_physptr)
-				reserve_bootmem_generic(mpf->mpf_physptr,
-					PAGE_SIZE, BOOTMEM_DEFAULT);
-#endif
-		return 1;
+			return 1;
 		}
 		bp += 4;
 		length -= 16;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 0e7bb5e8167..abadb1da70d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -785,3 +785,9 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 	free_init_pages("initrd memory", start, end);
 }
 #endif
+
+int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
+				   int flags)
+{
+	return reserve_bootmem(phys, len, flags);
+}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bf7bf1de6c2..b8c2c1ef7ad 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -799,7 +799,8 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
-int __init reserve_bootmem_generic(unsigned long phys, unsigned len, int flags)
+int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
+				   int flags)
 {
 #ifdef CONFIG_NUMA
 	int nid, next_nid;
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index a9f51472521..3dd458c385c 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -14,8 +14,6 @@ extern void ia32_syscall(void);
 extern void ia32_cstar_target(void);
 extern void ia32_sysenter_target(void);
 
-extern int reserve_bootmem_generic(unsigned long phys, unsigned len, int flags);
-
 extern void syscall32_cpu_init(void);
 
 extern void check_efer(void);
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 686895bacd9..a1d9b79078e 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -84,6 +84,8 @@ extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags);
 	__alloc_bootmem_low(x, PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
+extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
+				   int flags);
 extern unsigned long free_all_bootmem(void);
 extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
 extern void *__alloc_bootmem_node(pg_data_t *pgdat,
-- 
cgit v1.2.3-70-g09d2


From d867e5310bd3c560093d39669ef52ff7f1b5711a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 14 Jun 2008 01:26:41 -0700
Subject: x86: keep MP_intsrc_info untouched if we do not update mptable

Daniel Exner reported IO-APIC enumeration breakage in linux-next.

Alexey Starikovskiy found out that it might be related to
commit 2944e16b25 "x86: update mptable".

use enable_update_mptable to decide if need check before add mp_irqs array.

Reported-by: Daniel Exner <webmaster@dragonslave.de>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c |  3 +++
 arch/x86/kernel/mpparse.c   | 13 +++++++------
 include/asm-x86/mpspec.h    |  1 +
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index caf4ed7ca06..4d370b1c5ae 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1172,6 +1172,9 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
 	struct mpc_config_intsrc intsrc;
 	int ioapic;
 
+	if (!enable_update_mptable)
+		return 0;
+
 	/* print the entry should happen on mptable identically */
 	intsrc.mpc_type = MP_INTSRC;
 	intsrc.mpc_irqtype = mp_INT;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index b62ac6ba141..014ac5d90f8 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -34,6 +34,8 @@
 #include <mach_mpparse.h>
 #endif
 
+int enable_update_mptable;
+
 /*
  * Checksum an MP configuration block.
  */
@@ -295,10 +297,11 @@ void MP_intsrc_info(struct mpc_config_intsrc *m)
 
 	print_MP_intsrc_info(m);
 
-	for (i = 0; i < mp_irq_entries; i++) {
-		if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
-			return;
-	}
+	if (enable_update_mptable)
+		for (i = 0; i < mp_irq_entries; i++) {
+			if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
+				return;
+		}
 
 	assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
 	if (++mp_irq_entries == MAX_IRQ_SOURCES)
@@ -1110,8 +1113,6 @@ out:
 	return 0;
 }
 
-int __initdata enable_update_mptable;
-
 static int __init update_mptable_setup(char *str)
 {
 	enable_update_mptable = 1;
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index b8ba37496e2..f48dbca740e 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -61,6 +61,7 @@ extern void mp_config_acpi_legacy_irqs(void);
 extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
 extern void MP_intsrc_info(struct mpc_config_intsrc *m);
 #ifdef CONFIG_X86_IO_APIC
+extern int enable_update_mptable;
 extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
 				u32 gsi, int triggering, int polarity);
 #else
-- 
cgit v1.2.3-70-g09d2


From d0be6bdea103b8d04c8a3495538b7c0011ae4129 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 15 Jun 2008 18:58:51 -0700
Subject: x86: rename two e820 related functions

rename update_memory_range to e820_update_range
rename add_memory_region to e820_add_region

to make it more clear that they are about e820 map operations.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/aperture_64.c   |  2 +-
 arch/x86/kernel/cpu/mtrr/main.c |  2 +-
 arch/x86/kernel/e820.c          | 16 ++++++++--------
 arch/x86/kernel/efi.c           |  2 +-
 arch/x86/lguest/boot.c          |  2 +-
 arch/x86/mach-default/setup.c   |  4 ++--
 arch/x86/mach-visws/setup.c     |  6 +++---
 arch/x86/mach-voyager/setup.c   | 12 ++++++------
 arch/x86/xen/setup.c            |  4 ++--
 include/asm-x86/e820.h          |  4 ++--
 10 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 479926d9e00..66b140932b2 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -292,7 +292,7 @@ void __init early_gart_iommu_check(void)
 				    E820_RAM)) {
 			/* reserved it, so we can resuse it in second kernel */
 			printk(KERN_INFO "update e820 for GART\n");
-			add_memory_region(aper_base, aper_size, E820_RESERVED);
+			e820_add_region(aper_base, aper_size, E820_RESERVED);
 			update_e820();
 		}
 		return;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 0642201784e..105afe12beb 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -1440,7 +1440,7 @@ static u64 __init real_trim_memory(unsigned long start_pfn,
 	trim_size <<= PAGE_SHIFT;
 	trim_size -= trim_start;
 
-	return update_memory_range(trim_start, trim_size, E820_RAM,
+	return e820_update_range(trim_start, trim_size, E820_RAM,
 				E820_RESERVED);
 }
 /**
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 774063f11be..5051ce744b4 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -94,7 +94,7 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
 /*
  * Add a memory region to the kernel e820 map.
  */
-void __init add_memory_region(u64 start, u64 size, int type)
+void __init e820_add_region(u64 start, u64 size, int type)
 {
 	int x = e820.nr_map;
 
@@ -384,12 +384,12 @@ int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
 		if (start > end)
 			return -1;
 
-		add_memory_region(start, size, type);
+		e820_add_region(start, size, type);
 	} while (biosmap++, --nr_map);
 	return 0;
 }
 
-u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
+u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
 				unsigned new_type)
 {
 	int i;
@@ -414,7 +414,7 @@ u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
 		final_end = min(start + size, ei->addr + ei->size);
 		if (final_start >= final_end)
 			continue;
-		add_memory_region(final_start, final_end - final_start,
+		e820_add_region(final_start, final_end - final_start,
 					 new_type);
 		real_updated_size += final_end - final_start;
 	}
@@ -774,7 +774,7 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
 		return 0;
 
 	addr = round_down(start + size - sizet, align);
-	update_memory_range(addr, sizet, E820_RAM, E820_RESERVED);
+	e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
 	printk(KERN_INFO "update e820 for early_reserve_e820\n");
 	update_e820();
 
@@ -949,13 +949,13 @@ static int __init parse_memmap_opt(char *p)
 	userdef = 1;
 	if (*p == '@') {
 		start_at = memparse(p+1, &p);
-		add_memory_region(start_at, mem_size, E820_RAM);
+		e820_add_region(start_at, mem_size, E820_RAM);
 	} else if (*p == '#') {
 		start_at = memparse(p+1, &p);
-		add_memory_region(start_at, mem_size, E820_ACPI);
+		e820_add_region(start_at, mem_size, E820_ACPI);
 	} else if (*p == '$') {
 		start_at = memparse(p+1, &p);
-		add_memory_region(start_at, mem_size, E820_RESERVED);
+		e820_add_region(start_at, mem_size, E820_RESERVED);
 	} else {
 		end_user_pfn = (mem_size >> PAGE_SHIFT);
 	}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index d5c7fcdd186..473c89fe507 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -233,7 +233,7 @@ static void __init add_efi_memmap(void)
 			e820_type = E820_RAM;
 		else
 			e820_type = E820_RESERVED;
-		add_memory_region(start, size, e820_type);
+		e820_add_region(start, size, e820_type);
 	}
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 }
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 5e4772907c6..e72cf0793fb 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -835,7 +835,7 @@ static __init char *lguest_memory_setup(void)
 
 	/* The Linux bootloader header contains an "e820" memory map: the
 	 * Launcher populated the first entry with our memory limit. */
-	add_memory_region(boot_params.e820_map[0].addr,
+	e820_add_region(boot_params.e820_map[0].addr,
 			  boot_params.e820_map[0].size,
 			  boot_params.e820_map[0].type);
 
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 56b4c39cb7f..7ae692a7676 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -184,8 +184,8 @@ char * __init machine_specific_memory_setup(void)
 		}
 
 		e820.nr_map = 0;
-		add_memory_region(0, LOWMEMSIZE(), E820_RAM);
-		add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
+		e820_add_region(0, LOWMEMSIZE(), E820_RAM);
+		e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
   	}
 	return who;
 }
diff --git a/arch/x86/mach-visws/setup.c b/arch/x86/mach-visws/setup.c
index de4c9dbd086..d67868ec9b7 100644
--- a/arch/x86/mach-visws/setup.c
+++ b/arch/x86/mach-visws/setup.c
@@ -175,9 +175,9 @@ char * __init machine_specific_memory_setup(void)
 	sgivwfb_mem_size &= ~((1 << 20) - 1);
 	sgivwfb_mem_phys = mem_size - gfx_mem_size;
 
-	add_memory_region(0, LOWMEMSIZE(), E820_RAM);
-	add_memory_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
-	add_memory_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
+	e820_add_region(0, LOWMEMSIZE(), E820_RAM);
+	e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
+	e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
 
 	return "PROM";
 }
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index f4aca9fa954..f27b583154e 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -74,7 +74,7 @@ char *__init machine_specific_memory_setup(void)
 
 		e820.nr_map = 0;
 		for (i = 0; voyager_memory_detect(i, &addr, &length); i++) {
-			add_memory_region(addr, length, E820_RAM);
+			e820_add_region(addr, length, E820_RAM);
 		}
 		return who;
 	} else if (voyager_level == 4) {
@@ -92,14 +92,14 @@ char *__init machine_specific_memory_setup(void)
 			tom = (boot_params.screen_info.ext_mem_k) << 10;
 		}
 		who = "Voyager-TOM";
-		add_memory_region(0, 0x9f000, E820_RAM);
+		e820_add_region(0, 0x9f000, E820_RAM);
 		/* map from 1M to top of memory */
-		add_memory_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024,
+		e820_add_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024,
 				  E820_RAM);
 		/* FIXME: Should check the ASICs to see if I need to
 		 * take out the 8M window.  Just do it at the moment
 		 * */
-		add_memory_region(8 * 1024 * 1024, 8 * 1024 * 1024,
+		e820_add_region(8 * 1024 * 1024, 8 * 1024 * 1024,
 				  E820_RESERVED);
 		return who;
 	}
@@ -131,8 +131,8 @@ char *__init machine_specific_memory_setup(void)
 		}
 
 		e820.nr_map = 0;
-		add_memory_region(0, LOWMEMSIZE(), E820_RAM);
-		add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
+		e820_add_region(0, LOWMEMSIZE(), E820_RAM);
+		e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
 	}
 	return who;
 }
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 82517e4a752..9001c9df04d 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -39,8 +39,8 @@ char * __init xen_memory_setup(void)
 	unsigned long max_pfn = xen_start_info->nr_pages;
 
 	e820.nr_map = 0;
-	add_memory_region(0, LOWMEMSIZE(), E820_RAM);
-	add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM);
+	e820_add_region(0, LOWMEMSIZE(), E820_RAM);
+	e820_add_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM);
 
 	return "Xen";
 }
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index a5959e3a562..6b0ce745a60 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -60,12 +60,12 @@ extern struct e820map e820;
 
 extern int e820_any_mapped(u64 start, u64 end, unsigned type);
 extern int e820_all_mapped(u64 start, u64 end, unsigned type);
-extern void add_memory_region(u64 start, u64 size, int type);
+extern void e820_add_region(u64 start, u64 size, int type);
 extern void e820_print_map(char *who);
 extern int
 sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map);
 extern int copy_e820_map(struct e820entry *biosmap, int nr_map);
-extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
+extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
 			       unsigned new_type);
 extern void update_e820(void);
 extern void e820_setup_gap(void);
-- 
cgit v1.2.3-70-g09d2


From b5bc6c0e55000dab86b73f838f5ad02908b23755 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 14 Jun 2008 18:32:52 -0700
Subject: x86, mm: use add_highpages_with_active_regions() for high pages init
 v2

use early_node_map to init high pages, so we can remove page_is_ram() and
page_is_reserved_early() in the big loop with add_one_highpage

also remove page_is_reserved_early(), it is not needed anymore.

v2: fix the build of other platforms

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c     | 11 --------
 arch/x86/mm/discontig_32.c | 19 ++++++--------
 arch/x86/mm/init_32.c      | 62 ++++++++++++++++++++++++++++++++++++++--------
 include/asm-x86/e820.h     |  1 -
 include/asm-x86/highmem.h  |  3 +++
 include/linux/mm.h         |  2 ++
 mm/page_alloc.c            |  8 ++++++
 7 files changed, 71 insertions(+), 35 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 5051ce744b4..ed46b7a6bc1 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -612,17 +612,6 @@ void __init free_early(u64 start, u64 end)
 	early_res[j - 1].end = 0;
 }
 
-int __init page_is_reserved_early(unsigned long pagenr)
-{
-	u64 start = (u64)pagenr << PAGE_SHIFT;
-	int i;
-	struct early_res *r;
-
-	i = find_overlapped_early(start, start + PAGE_SIZE);
-	r = &early_res[i];
-	return (i < MAX_EARLY_RES && r->end);
-}
-
 void __init early_res_to_bootmem(u64 start, u64 end)
 {
 	int i;
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index c3f119e99e0..7c4d0255f8d 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -100,7 +100,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
 #endif
 
 extern unsigned long find_max_low_pfn(void);
-extern void add_one_highpage_init(struct page *, int, int);
 extern unsigned long highend_pfn, highstart_pfn;
 
 #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
@@ -432,10 +431,10 @@ void __init set_highmem_pages_init(int bad_ppro)
 {
 #ifdef CONFIG_HIGHMEM
 	struct zone *zone;
-	struct page *page;
+	int nid;
 
 	for_each_zone(zone) {
-		unsigned long node_pfn, zone_start_pfn, zone_end_pfn;
+		unsigned long zone_start_pfn, zone_end_pfn;
 
 		if (!is_highmem(zone))
 			continue;
@@ -443,16 +442,12 @@ void __init set_highmem_pages_init(int bad_ppro)
 		zone_start_pfn = zone->zone_start_pfn;
 		zone_end_pfn = zone_start_pfn + zone->spanned_pages;
 
+		nid = zone_to_nid(zone);
 		printk("Initializing %s for node %d (%08lx:%08lx)\n",
-				zone->name, zone_to_nid(zone),
-				zone_start_pfn, zone_end_pfn);
-
-		for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) {
-			if (!pfn_valid(node_pfn))
-				continue;
-			page = pfn_to_page(node_pfn);
-			add_one_highpage_init(page, node_pfn, bad_ppro);
-		}
+				zone->name, nid, zone_start_pfn, zone_end_pfn);
+
+		add_highpages_with_active_regions(nid, zone_start_pfn,
+				 zone_end_pfn, bad_ppro);
 	}
 	totalram_pages += totalhigh_pages;
 #endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index abadb1da70d..ba07a489230 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -287,10 +287,10 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
 	pkmap_page_table = pte;
 }
 
-void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
+static void __init
+add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
 {
-	if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn)) &&
-	    !page_is_reserved_early(pfn)) {
+	if (!(bad_ppro && page_kills_ppro(pfn))) {
 		ClearPageReserved(page);
 		init_page_count(page);
 		__free_page(page);
@@ -299,18 +299,58 @@ void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
 		SetPageReserved(page);
 }
 
+struct add_highpages_data {
+	unsigned long start_pfn;
+	unsigned long end_pfn;
+	int bad_ppro;
+};
+
+static void __init add_highpages_work_fn(unsigned long start_pfn,
+					 unsigned long end_pfn, void *datax)
+{
+	int node_pfn;
+	struct page *page;
+	unsigned long final_start_pfn, final_end_pfn;
+	struct add_highpages_data *data;
+	int bad_ppro;
+
+	data = (struct add_highpages_data *)datax;
+	bad_ppro = data->bad_ppro;
+
+	final_start_pfn = max(start_pfn, data->start_pfn);
+	final_end_pfn = min(end_pfn, data->end_pfn);
+	if (final_start_pfn >= final_end_pfn)
+		return;
+
+	for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
+	     node_pfn++) {
+		if (!pfn_valid(node_pfn))
+			continue;
+		page = pfn_to_page(node_pfn);
+		add_one_highpage_init(page, node_pfn, bad_ppro);
+	}
+
+}
+
+void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
+					      unsigned long end_pfn,
+					      int bad_ppro)
+{
+	struct add_highpages_data data;
+
+	data.start_pfn = start_pfn;
+	data.end_pfn = end_pfn;
+	data.bad_ppro = bad_ppro;
+
+	work_with_active_regions(nid, add_highpages_work_fn, &data);
+}
+
 #ifndef CONFIG_NUMA
 static void __init set_highmem_pages_init(int bad_ppro)
 {
-	int pfn;
+	add_highpages_with_active_regions(0, highstart_pfn, highend_pfn,
+						bad_ppro);
 
-	for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) {
-		/*
-		 * Holes under sparsemem might not have no mem_map[]:
-		 */
-		if (pfn_valid(pfn))
-			add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
-	}
 	totalram_pages += totalhigh_pages;
 }
 #endif /* !CONFIG_NUMA */
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 6b0ce745a60..55d31059690 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -86,7 +86,6 @@ extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
 extern void reserve_early(u64 start, u64 end, char *name);
 extern void free_early(u64 start, u64 end);
 extern void early_res_to_bootmem(u64 start, u64 end);
-extern int page_is_reserved_early(unsigned long pagenr);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 
 extern unsigned long e820_end_of_ram(void);
diff --git a/include/asm-x86/highmem.h b/include/asm-x86/highmem.h
index e153f3b4477..85c4fea41ff 100644
--- a/include/asm-x86/highmem.h
+++ b/include/asm-x86/highmem.h
@@ -74,6 +74,9 @@ struct page *kmap_atomic_to_page(void *ptr);
 
 #define flush_cache_kmaps()	do { } while (0)
 
+extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn,
+					unsigned long end_pfn, int bad_ppro);
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_HIGHMEM_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 034a3156d2f..e4de460907c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1011,6 +1011,8 @@ extern unsigned long find_min_pfn_with_active_regions(void);
 extern unsigned long find_max_pfn_with_active_regions(void);
 extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
+typedef void (*work_fn_t)(unsigned long, unsigned long, void *);
+extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
 extern void sparse_memory_present_with_active_regions(int nid);
 #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
 extern int early_pfn_to_nid(unsigned long pfn);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d80e1868e57..41c6e3aa059 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2929,6 +2929,14 @@ void __init free_bootmem_with_active_regions(int nid,
 	}
 }
 
+void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
+{
+	int i;
+
+	for_each_active_range_index_in_nid(i, nid)
+		work_fn(early_node_map[i].start_pfn, early_node_map[i].end_pfn,
+			data);
+}
 /**
  * sparse_memory_present_with_active_regions - Call memory_present for each active range
  * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
-- 
cgit v1.2.3-70-g09d2


From 8c5beb50d3ec915d15c4d38aa37282309a65f14e Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 11 Jun 2008 11:33:39 +0800
Subject: x86 boot: pass E820 memory map entries more than 128 via linked list
 of setup data

Because of the size limits of struct boot_params (zero page), the
maximum number of E820 memory map entries can be passed to kernel is
128. As pointed by Paul Jackson, there is some machine produced by SGI
with so many nodes that the number of E820 memory map entries is more
than 128. To enabling Linux kernel on these system, a new setup data
type named SETUP_E820_EXT is defined to pass additional memory map
entries to Linux kernel.

This patch is based on x86/auto-latest branch of git-x86 tree and has
been tested on x86_64 and i386 platform.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c      | 59 +++++++++++++++++++++++++++++++++++----------
 arch/x86/kernel/setup.c     |  3 +++
 include/asm-x86/bootparam.h |  1 +
 include/asm-x86/e820.h      |  2 ++
 4 files changed, 52 insertions(+), 13 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index ed46b7a6bc1..544dd12c70f 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -359,6 +359,26 @@ static struct e820entry new_bios[E820_X_MAX] __initdata;
 	return 0;
 }
 
+static int __init __copy_e820_map(struct e820entry *biosmap, int nr_map)
+{
+	while (nr_map) {
+		u64 start = biosmap->addr;
+		u64 size = biosmap->size;
+		u64 end = start + size;
+		u32 type = biosmap->type;
+
+		/* Overflow in 64 bits? Ignore the memory map. */
+		if (start > end)
+			return -1;
+
+		e820_add_region(start, size, type);
+
+		biosmap++;
+		nr_map--;
+	}
+	return 0;
+}
+
 /*
  * Copy the BIOS e820 map into a safe place.
  *
@@ -374,19 +394,7 @@ int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
 	if (nr_map < 2)
 		return -1;
 
-	do {
-		u64 start = biosmap->addr;
-		u64 size = biosmap->size;
-		u64 end = start + size;
-		u32 type = biosmap->type;
-
-		/* Overflow in 64 bits? Ignore the memory map. */
-		if (start > end)
-			return -1;
-
-		e820_add_region(start, size, type);
-	} while (biosmap++, --nr_map);
-	return 0;
+	return __copy_e820_map(biosmap, nr_map);
 }
 
 u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
@@ -496,6 +504,31 @@ __init void e820_setup_gap(void)
 	       pci_mem_start, gapstart, gapsize);
 }
 
+/**
+ * Because of the size limitation of struct boot_params, only first
+ * 128 E820 memory entries are passed to kernel via
+ * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
+ * linked list of struct setup_data, which is parsed here.
+ */
+void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
+{
+	u32 map_len;
+	int entries;
+	struct e820entry *extmap;
+
+	entries = sdata->len / sizeof(struct e820entry);
+	map_len = sdata->len + sizeof(struct setup_data);
+	if (map_len > PAGE_SIZE)
+		sdata = early_ioremap(pa_data, map_len);
+	extmap = (struct e820entry *)(sdata->data);
+	__copy_e820_map(extmap, entries);
+	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+	if (map_len > PAGE_SIZE)
+		early_iounmap(sdata, map_len);
+	printk(KERN_INFO "extended physical RAM map:\n");
+	e820_print_map("extended");
+}
+
 #if defined(CONFIG_X86_64) || \
 	(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
 /**
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 45a5e247d45..5b0de38cde4 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -150,6 +150,9 @@ void __init parse_setup_data(void)
 	while (pa_data) {
 		data = early_ioremap(pa_data, PAGE_SIZE);
 		switch (data->type) {
+		case SETUP_E820_EXT:
+			parse_e820_ext(data, pa_data);
+			break;
 		default:
 			break;
 		}
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
index 0a073904168..876f2113666 100644
--- a/include/asm-x86/bootparam.h
+++ b/include/asm-x86/bootparam.h
@@ -11,6 +11,7 @@
 
 /* setup data types */
 #define SETUP_NONE			0
+#define SETUP_E820_EXT			1
 
 /* extensible setup data list node */
 struct setup_data {
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 55d31059690..77fc24d8916 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -69,6 +69,8 @@ extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
 			       unsigned new_type);
 extern void update_e820(void);
 extern void e820_setup_gap(void);
+struct setup_data;
+extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data);
 
 #if defined(CONFIG_X86_64) || \
 	(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
-- 
cgit v1.2.3-70-g09d2


From 41c094fd3ca54f1a71233049cf136ff94c91f4ae Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Mon, 16 Jun 2008 13:03:31 -0700
Subject: x86: move e820_resource_resources to e820.c

and make 32-bit resource registration more like 64 bit.

also move probe_roms back to setup_32.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c     |  32 +++++++
 arch/x86/kernel/e820_32.c  | 192 -----------------------------------------
 arch/x86/kernel/e820_64.c  |  24 ------
 arch/x86/kernel/setup_32.c | 208 ++++++++++++++++++++++++++++++++++++++-------
 include/asm-x86/e820.h     |   1 +
 include/asm-x86/e820_32.h  |   4 -
 include/asm-x86/e820_64.h  |   1 -
 7 files changed, 208 insertions(+), 254 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 544dd12c70f..432c4917857 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -998,3 +998,35 @@ void __init finish_e820_parsing(void)
 		e820_print_map("user");
 	}
 }
+
+/*
+ * Mark e820 reserved areas as busy for the resource manager.
+ */
+void __init e820_reserve_resources(void)
+{
+	int i;
+	struct resource *res;
+
+	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
+	for (i = 0; i < e820.nr_map; i++) {
+		switch (e820.map[i].type) {
+		case E820_RAM:	res->name = "System RAM"; break;
+		case E820_ACPI:	res->name = "ACPI Tables"; break;
+		case E820_NVS:	res->name = "ACPI Non-volatile Storage"; break;
+		default:	res->name = "reserved";
+		}
+		res->start = e820.map[i].addr;
+		res->end = res->start + e820.map[i].size - 1;
+#ifndef CONFIG_RESOURCES_64BIT
+		if (res->end > 0x100000000ULL) {
+			res++;
+			continue;
+		}
+#endif
+		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		insert_resource(&iomem_resource, res);
+		res++;
+	}
+}
+
+
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 8de3df9548d..1205ccf086d 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -15,198 +15,6 @@
 #include <asm/e820.h>
 #include <asm/setup.h>
 
-static struct resource system_rom_resource = {
-	.name	= "System ROM",
-	.start	= 0xf0000,
-	.end	= 0xfffff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-static struct resource extension_rom_resource = {
-	.name	= "Extension ROM",
-	.start	= 0xe0000,
-	.end	= 0xeffff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-static struct resource adapter_rom_resources[] = { {
-	.name 	= "Adapter ROM",
-	.start	= 0xc8000,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-} };
-
-static struct resource video_rom_resource = {
-	.name 	= "Video ROM",
-	.start	= 0xc0000,
-	.end	= 0xc7fff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-#define ROMSIGNATURE 0xaa55
-
-static int __init romsignature(const unsigned char *rom)
-{
-	const unsigned short * const ptr = (const unsigned short *)rom;
-	unsigned short sig;
-
-	return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
-}
-
-static int __init romchecksum(const unsigned char *rom, unsigned long length)
-{
-	unsigned char sum, c;
-
-	for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
-		sum += c;
-	return !length && !sum;
-}
-
-static void __init probe_roms(void)
-{
-	const unsigned char *rom;
-	unsigned long start, length, upper;
-	unsigned char c;
-	int i;
-
-	/* video rom */
-	upper = adapter_rom_resources[0].start;
-	for (start = video_rom_resource.start; start < upper; start += 2048) {
-		rom = isa_bus_to_virt(start);
-		if (!romsignature(rom))
-			continue;
-
-		video_rom_resource.start = start;
-
-		if (probe_kernel_address(rom + 2, c) != 0)
-			continue;
-
-		/* 0 < length <= 0x7f * 512, historically */
-		length = c * 512;
-
-		/* if checksum okay, trust length byte */
-		if (length && romchecksum(rom, length))
-			video_rom_resource.end = start + length - 1;
-
-		request_resource(&iomem_resource, &video_rom_resource);
-		break;
-	}
-
-	start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
-	if (start < upper)
-		start = upper;
-
-	/* system rom */
-	request_resource(&iomem_resource, &system_rom_resource);
-	upper = system_rom_resource.start;
-
-	/* check for extension rom (ignore length byte!) */
-	rom = isa_bus_to_virt(extension_rom_resource.start);
-	if (romsignature(rom)) {
-		length = extension_rom_resource.end - extension_rom_resource.start + 1;
-		if (romchecksum(rom, length)) {
-			request_resource(&iomem_resource, &extension_rom_resource);
-			upper = extension_rom_resource.start;
-		}
-	}
-
-	/* check for adapter roms on 2k boundaries */
-	for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
-		rom = isa_bus_to_virt(start);
-		if (!romsignature(rom))
-			continue;
-
-		if (probe_kernel_address(rom + 2, c) != 0)
-			continue;
-
-		/* 0 < length <= 0x7f * 512, historically */
-		length = c * 512;
-
-		/* but accept any length that fits if checksum okay */
-		if (!length || start + length > upper || !romchecksum(rom, length))
-			continue;
-
-		adapter_rom_resources[i].start = start;
-		adapter_rom_resources[i].end = start + length - 1;
-		request_resource(&iomem_resource, &adapter_rom_resources[i]);
-
-		start = adapter_rom_resources[i++].end & ~2047UL;
-	}
-}
-
-/*
- * Request address space for all standard RAM and ROM resources
- * and also for regions reported as reserved by the e820.
- */
-void __init init_iomem_resources(struct resource *code_resource,
-		struct resource *data_resource,
-		struct resource *bss_resource)
-{
-	int i;
-
-	probe_roms();
-	for (i = 0; i < e820.nr_map; i++) {
-		struct resource *res;
-#ifndef CONFIG_RESOURCES_64BIT
-		if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
-			continue;
-#endif
-		res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
-		switch (e820.map[i].type) {
-		case E820_RAM:	res->name = "System RAM"; break;
-		case E820_ACPI:	res->name = "ACPI Tables"; break;
-		case E820_NVS:	res->name = "ACPI Non-volatile Storage"; break;
-		default:	res->name = "reserved";
-		}
-		res->start = e820.map[i].addr;
-		res->end = res->start + e820.map[i].size - 1;
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-		if (request_resource(&iomem_resource, res)) {
-			kfree(res);
-			continue;
-		}
-		if (e820.map[i].type == E820_RAM) {
-			/*
-			 *  We don't know which RAM region contains kernel data,
-			 *  so we try it repeatedly and let the resource manager
-			 *  test it.
-			 */
-			request_resource(res, code_resource);
-			request_resource(res, data_resource);
-			request_resource(res, bss_resource);
-#ifdef CONFIG_KEXEC
-			if (crashk_res.start != crashk_res.end)
-				request_resource(res, &crashk_res);
-#endif
-		}
-	}
-}
-
 /* Overridden in paravirt.c if CONFIG_PARAVIRT */
 char * __init __attribute__((weak)) memory_setup(void)
 {
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 47952b1690b..cb03bff9fa2 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -41,30 +41,6 @@ unsigned long end_pfn;
  */
 unsigned long max_pfn_mapped;
 
-/*
- * Mark e820 reserved areas as busy for the resource manager.
- */
-void __init e820_reserve_resources(void)
-{
-	int i;
-	struct resource *res;
-
-	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
-	for (i = 0; i < e820.nr_map; i++) {
-		switch (e820.map[i].type) {
-		case E820_RAM:	res->name = "System RAM"; break;
-		case E820_ACPI:	res->name = "ACPI Tables"; break;
-		case E820_NVS:	res->name = "ACPI Non-volatile Storage"; break;
-		default:	res->name = "reserved";
-		}
-		res->start = e820.map[i].addr;
-		res->end = res->start + e820.map[i].size - 1;
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-		insert_resource(&iomem_resource, res);
-		res++;
-	}
-}
-
 static void early_panic(char *msg)
 {
 	early_printk(msg);
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 72b11d4557b..f3ddba5ed9a 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -445,25 +445,28 @@ static void __init reserve_crashkernel(void)
 	ret = parse_crashkernel(boot_command_line, total_mem,
 			&crash_size, &crash_base);
 	if (ret == 0 && crash_size > 0) {
-		if (crash_base > 0) {
-			printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
-					"for crashkernel (System RAM: %ldMB)\n",
-					(unsigned long)(crash_size >> 20),
-					(unsigned long)(crash_base >> 20),
-					(unsigned long)(total_mem >> 20));
-
-			if (reserve_bootmem(crash_base, crash_size,
-					BOOTMEM_EXCLUSIVE) < 0) {
-				printk(KERN_INFO "crashkernel reservation "
-					"failed - memory is in use\n");
-				return;
-			}
-
-			crashk_res.start = crash_base;
-			crashk_res.end   = crash_base + crash_size - 1;
-		} else
+		if (crash_base <= 0) {
 			printk(KERN_INFO "crashkernel reservation failed - "
 					"you have to specify a base address\n");
+			return;
+		}
+
+		if (reserve_bootmem_generic(crash_base, crash_size,
+					BOOTMEM_EXCLUSIVE) < 0) {
+			printk(KERN_INFO "crashkernel reservation failed - "
+					"memory is in use\n");
+			return;
+		}
+
+		printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+				"for crashkernel (System RAM: %ldMB)\n",
+				(unsigned long)(crash_size >> 20),
+				(unsigned long)(crash_base >> 20),
+				(unsigned long)(total_mem >> 20));
+
+		crashk_res.start = crash_base;
+		crashk_res.end   = crash_base + crash_size - 1;
+		insert_resource(&iomem_resource, &crashk_res);
 	}
 }
 #else
@@ -675,6 +678,8 @@ int x86_cpu_to_node_map_init[NR_CPUS] = {
 DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
 #endif
 
+static void probe_roms(void);
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -684,6 +689,7 @@ DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
  */
 void __init setup_arch(char **cmdline_p)
 {
+	int i;
 	unsigned long max_low_pfn;
 
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
@@ -745,6 +751,13 @@ void __init setup_arch(char **cmdline_p)
 
 	finish_e820_parsing();
 
+	probe_roms();
+
+	/* after parse_early_param, so could debug it */
+	insert_resource(&iomem_resource, &code_resource);
+	insert_resource(&iomem_resource, &data_resource);
+	insert_resource(&iomem_resource, &bss_resource);
+
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 
@@ -861,9 +874,16 @@ void __init setup_arch(char **cmdline_p)
 			"CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
 #endif
 
-	e820_setup_gap();
+	e820_reserve_resources();
 	e820_mark_nosave_regions(max_low_pfn);
 
+	request_resource(&iomem_resource, &video_ram_resource);
+	/* request I/O space for devices used on all i[345]86 PCs */
+	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
+		request_resource(&ioport_resource, &standard_io_resources[i]);
+
+	e820_setup_gap();
+
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
 	if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
@@ -874,25 +894,147 @@ void __init setup_arch(char **cmdline_p)
 #endif
 }
 
-/*
- * Request address space for all standard resources
- *
- * This is called just before pcibios_init(), which is also a
- * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
- */
-static int __init request_standard_resources(void)
+static struct resource system_rom_resource = {
+	.name	= "System ROM",
+	.start	= 0xf0000,
+	.end	= 0xfffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+static struct resource extension_rom_resource = {
+	.name	= "Extension ROM",
+	.start	= 0xe0000,
+	.end	= 0xeffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+static struct resource adapter_rom_resources[] = { {
+	.name 	= "Adapter ROM",
+	.start	= 0xc8000,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+} };
+
+static struct resource video_rom_resource = {
+	.name 	= "Video ROM",
+	.start	= 0xc0000,
+	.end	= 0xc7fff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+#define ROMSIGNATURE 0xaa55
+
+static int __init romsignature(const unsigned char *rom)
+{
+	const unsigned short * const ptr = (const unsigned short *)rom;
+	unsigned short sig;
+
+	return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
+}
+
+static int __init romchecksum(const unsigned char *rom, unsigned long length)
+{
+	unsigned char sum, c;
+
+	for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
+		sum += c;
+	return !length && !sum;
+}
+
+static void __init probe_roms(void)
 {
+	const unsigned char *rom;
+	unsigned long start, length, upper;
+	unsigned char c;
 	int i;
 
-	printk(KERN_INFO "Setting up standard PCI resources\n");
-	init_iomem_resources(&code_resource, &data_resource, &bss_resource);
+	/* video rom */
+	upper = adapter_rom_resources[0].start;
+	for (start = video_rom_resource.start; start < upper; start += 2048) {
+		rom = isa_bus_to_virt(start);
+		if (!romsignature(rom))
+			continue;
 
-	request_resource(&iomem_resource, &video_ram_resource);
+		video_rom_resource.start = start;
 
-	/* request I/O space for devices used on all i[345]86 PCs */
-	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
-		request_resource(&ioport_resource, &standard_io_resources[i]);
-	return 0;
+		if (probe_kernel_address(rom + 2, c) != 0)
+			continue;
+
+		/* 0 < length <= 0x7f * 512, historically */
+		length = c * 512;
+
+		/* if checksum okay, trust length byte */
+		if (length && romchecksum(rom, length))
+			video_rom_resource.end = start + length - 1;
+
+		request_resource(&iomem_resource, &video_rom_resource);
+		break;
+	}
+
+	start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
+	if (start < upper)
+		start = upper;
+
+	/* system rom */
+	request_resource(&iomem_resource, &system_rom_resource);
+	upper = system_rom_resource.start;
+
+	/* check for extension rom (ignore length byte!) */
+	rom = isa_bus_to_virt(extension_rom_resource.start);
+	if (romsignature(rom)) {
+		length = extension_rom_resource.end - extension_rom_resource.start + 1;
+		if (romchecksum(rom, length)) {
+			request_resource(&iomem_resource, &extension_rom_resource);
+			upper = extension_rom_resource.start;
+		}
+	}
+
+	/* check for adapter roms on 2k boundaries */
+	for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
+		rom = isa_bus_to_virt(start);
+		if (!romsignature(rom))
+			continue;
+
+		if (probe_kernel_address(rom + 2, c) != 0)
+			continue;
+
+		/* 0 < length <= 0x7f * 512, historically */
+		length = c * 512;
+
+		/* but accept any length that fits if checksum okay */
+		if (!length || start + length > upper || !romchecksum(rom, length))
+			continue;
+
+		adapter_rom_resources[i].start = start;
+		adapter_rom_resources[i].end = start + length - 1;
+		request_resource(&iomem_resource, &adapter_rom_resources[i]);
+
+		start = adapter_rom_resources[i++].end & ~2047UL;
+	}
 }
 
-subsys_initcall(request_standard_resources);
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 77fc24d8916..e860fe758e7 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -100,6 +100,7 @@ extern void e820_register_active_regions(int nid, unsigned long start_pfn,
 					 unsigned long end_pfn);
 extern u64 e820_hole_size(u64 start, u64 end);
 extern void finish_e820_parsing(void);
+extern void e820_reserve_resources(void);
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index 9135ce6e617..557b890549f 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -20,9 +20,5 @@
 
 extern void setup_memory_map(void);
 
-extern void init_iomem_resources(struct resource *code_resource,
-				 struct resource *data_resource,
-				 struct resource *bss_resource);
-
 #endif/*!__ASSEMBLY__*/
 #endif/*__E820_HEADER*/
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index 478547c6322..8d992109969 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -16,7 +16,6 @@
 #ifndef __ASSEMBLY__
 extern void setup_memory_region(void);
 extern void contig_e820_setup(void);
-extern void e820_reserve_resources(void);
 extern int e820_any_non_reserved(unsigned long start, unsigned long end);
 extern int is_memory_any_valid(unsigned long start, unsigned long end);
 extern int e820_all_non_reserved(unsigned long start, unsigned long end);
-- 
cgit v1.2.3-70-g09d2


From cc9f7a0ccf000d4db5fbdc7b0ae48eefea102f69 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Mon, 16 Jun 2008 16:11:08 -0700
Subject: x86: kill bad_ppro

so don't punish all other cpus without that problem when init highmem

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_32.c |  9 +++++++++
 arch/x86/mm/discontig_32.c |  4 ++--
 arch/x86/mm/init_32.c      | 43 ++++++++++++-------------------------------
 include/asm-x86/highmem.h  |  2 +-
 include/asm-x86/numa_32.h  |  2 +-
 5 files changed, 25 insertions(+), 35 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index f3ddba5ed9a..9692aeb8eca 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -68,6 +68,7 @@
 #include <asm/cacheflush.h>
 #include <asm/processor.h>
 #include <asm/efi.h>
+#include <asm/bugs.h>
 
 /* This value is set up by the early boot code to point to the value
    immediately after the boot time page tables.  It contains a *physical*
@@ -764,6 +765,14 @@ void __init setup_arch(char **cmdline_p)
 	if (efi_enabled)
 		efi_init();
 
+	if (ppro_with_ram_bug()) {
+		e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
+				  E820_RESERVED);
+		sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+		printk(KERN_INFO "fixed physical RAM map:\n");
+		e820_print_map("bad_ppro");
+	}
+
 	e820_register_active_regions(0, 0, -1UL);
 	/*
 	 * partially used pages are not usable - thus
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 7c4d0255f8d..6216e43b6e9 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -427,7 +427,7 @@ void __init zone_sizes_init(void)
 	return;
 }
 
-void __init set_highmem_pages_init(int bad_ppro) 
+void __init set_highmem_pages_init(void)
 {
 #ifdef CONFIG_HIGHMEM
 	struct zone *zone;
@@ -447,7 +447,7 @@ void __init set_highmem_pages_init(int bad_ppro)
 				zone->name, nid, zone_start_pfn, zone_end_pfn);
 
 		add_highpages_with_active_regions(nid, zone_start_pfn,
-				 zone_end_pfn, bad_ppro);
+				 zone_end_pfn);
 	}
 	totalram_pages += totalhigh_pages;
 #endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ba07a489230..fb5694d788b 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -220,13 +220,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 	}
 }
 
-static inline int page_kills_ppro(unsigned long pagenr)
-{
-	if (pagenr >= 0x70000 && pagenr <= 0x7003F)
-		return 1;
-	return 0;
-}
-
 /*
  * devmem_is_allowed() checks to see if /dev/mem access to a certain address
  * is valid. The argument is a physical page number.
@@ -287,22 +280,17 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
 	pkmap_page_table = pte;
 }
 
-static void __init
-add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
+static void __init add_one_highpage_init(struct page *page, int pfn)
 {
-	if (!(bad_ppro && page_kills_ppro(pfn))) {
-		ClearPageReserved(page);
-		init_page_count(page);
-		__free_page(page);
-		totalhigh_pages++;
-	} else
-		SetPageReserved(page);
+	ClearPageReserved(page);
+	init_page_count(page);
+	__free_page(page);
+	totalhigh_pages++;
 }
 
 struct add_highpages_data {
 	unsigned long start_pfn;
 	unsigned long end_pfn;
-	int bad_ppro;
 };
 
 static void __init add_highpages_work_fn(unsigned long start_pfn,
@@ -312,10 +300,8 @@ static void __init add_highpages_work_fn(unsigned long start_pfn,
 	struct page *page;
 	unsigned long final_start_pfn, final_end_pfn;
 	struct add_highpages_data *data;
-	int bad_ppro;
 
 	data = (struct add_highpages_data *)datax;
-	bad_ppro = data->bad_ppro;
 
 	final_start_pfn = max(start_pfn, data->start_pfn);
 	final_end_pfn = min(end_pfn, data->end_pfn);
@@ -327,29 +313,26 @@ static void __init add_highpages_work_fn(unsigned long start_pfn,
 		if (!pfn_valid(node_pfn))
 			continue;
 		page = pfn_to_page(node_pfn);
-		add_one_highpage_init(page, node_pfn, bad_ppro);
+		add_one_highpage_init(page, node_pfn);
 	}
 
 }
 
 void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
-					      unsigned long end_pfn,
-					      int bad_ppro)
+					      unsigned long end_pfn)
 {
 	struct add_highpages_data data;
 
 	data.start_pfn = start_pfn;
 	data.end_pfn = end_pfn;
-	data.bad_ppro = bad_ppro;
 
 	work_with_active_regions(nid, add_highpages_work_fn, &data);
 }
 
 #ifndef CONFIG_NUMA
-static void __init set_highmem_pages_init(int bad_ppro)
+static void __init set_highmem_pages_init(void)
 {
-	add_highpages_with_active_regions(0, highstart_pfn, highend_pfn,
-						bad_ppro);
+	add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
 
 	totalram_pages += totalhigh_pages;
 }
@@ -358,7 +341,7 @@ static void __init set_highmem_pages_init(int bad_ppro)
 #else
 # define kmap_init()				do { } while (0)
 # define permanent_kmaps_init(pgd_base)		do { } while (0)
-# define set_highmem_pages_init(bad_ppro)	do { } while (0)
+# define set_highmem_pages_init()	do { } while (0)
 #endif /* CONFIG_HIGHMEM */
 
 pteval_t __PAGE_KERNEL = _PAGE_KERNEL;
@@ -605,13 +588,11 @@ static struct kcore_list kcore_mem, kcore_vmalloc;
 void __init mem_init(void)
 {
 	int codesize, reservedpages, datasize, initsize;
-	int tmp, bad_ppro;
+	int tmp;
 
 #ifdef CONFIG_FLATMEM
 	BUG_ON(!mem_map);
 #endif
-	bad_ppro = ppro_with_ram_bug();
-
 #ifdef CONFIG_HIGHMEM
 	/* check that fixmap and pkmap do not overlap */
 	if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
@@ -634,7 +615,7 @@ void __init mem_init(void)
 		if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
 			reservedpages++;
 
-	set_highmem_pages_init(bad_ppro);
+	set_highmem_pages_init();
 
 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
diff --git a/include/asm-x86/highmem.h b/include/asm-x86/highmem.h
index 85c4fea41ff..4514b16cc72 100644
--- a/include/asm-x86/highmem.h
+++ b/include/asm-x86/highmem.h
@@ -75,7 +75,7 @@ struct page *kmap_atomic_to_page(void *ptr);
 #define flush_cache_kmaps()	do { } while (0)
 
 extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn,
-					unsigned long end_pfn, int bad_ppro);
+					unsigned long end_pfn);
 
 #endif /* __KERNEL__ */
 
diff --git a/include/asm-x86/numa_32.h b/include/asm-x86/numa_32.h
index 03d0f7a9bf0..a02674f6486 100644
--- a/include/asm-x86/numa_32.h
+++ b/include/asm-x86/numa_32.h
@@ -5,7 +5,7 @@ extern int pxm_to_nid(int pxm);
 
 #ifdef CONFIG_NUMA
 extern void __init remap_numa_kva(void);
-extern void set_highmem_pages_init(int);
+extern void set_highmem_pages_init(void);
 #else
 static inline void remap_numa_kva(void)
 {
-- 
cgit v1.2.3-70-g09d2


From 064d25f12014ae1d97c2882f9ab874995321f2b2 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Mon, 16 Jun 2008 19:58:28 -0700
Subject: x86: merge setup_memory_map with e820

... and kill e820_32/64.c and e820_32/64.h

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile      |  2 +-
 arch/x86/kernel/e820.c        | 72 +++++++++++++++++++++++++++++++++
 arch/x86/kernel/e820_32.c     | 29 --------------
 arch/x86/kernel/e820_64.c     | 92 -------------------------------------------
 arch/x86/kernel/setup_64.c    |  8 +---
 arch/x86/mach-default/setup.c | 47 ----------------------
 arch/x86/mm/init_64.c         | 12 ++++++
 include/asm-x86/e820.h        |  9 +++--
 include/asm-x86/e820_32.h     | 24 -----------
 include/asm-x86/e820_64.h     | 26 ------------
 include/asm-x86/setup.h       |  5 ---
 11 files changed, 92 insertions(+), 234 deletions(-)
 delete mode 100644 arch/x86/kernel/e820_32.c
 delete mode 100644 arch/x86/kernel/e820_64.c
 delete mode 100644 include/asm-x86/e820_32.h
 delete mode 100644 include/asm-x86/e820_64.h

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index dc3c636d113..bcc2b123dab 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -22,7 +22,7 @@ obj-y			+= setup_$(BITS).o i8259_$(BITS).o setup.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o setup64.o
-obj-y			+= bootflag.o e820_$(BITS).o e820.o
+obj-y			+= bootflag.o e820.o
 obj-y			+= pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o pci-nommu.o
 obj-$(CONFIG_X86_64)	+= bugs_64.o
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 432c4917857..49477484a2f 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1029,4 +1029,76 @@ void __init e820_reserve_resources(void)
 	}
 }
 
+char *__init __attribute__((weak)) machine_specific_memory_setup(void)
+{
+	char *who = "BIOS-e820";
+	int new_nr;
+	/*
+	 * Try to copy the BIOS-supplied E820-map.
+	 *
+	 * Otherwise fake a memory map; one section from 0k->640k,
+	 * the next section from 1mb->appropriate_mem_k
+	 */
+	new_nr = boot_params.e820_entries;
+	sanitize_e820_map(boot_params.e820_map,
+			ARRAY_SIZE(boot_params.e820_map),
+			&new_nr);
+	boot_params.e820_entries = new_nr;
+	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) {
+#ifdef CONFIG_X86_64
+		early_panic("Cannot find a valid memory map");
+#else
+		unsigned long mem_size;
+
+		/* compare results from other methods and take the greater */
+		if (boot_params.alt_mem_k
+		    < boot_params.screen_info.ext_mem_k) {
+			mem_size = boot_params.screen_info.ext_mem_k;
+			who = "BIOS-88";
+		} else {
+			mem_size = boot_params.alt_mem_k;
+			who = "BIOS-e801";
+		}
+
+		e820.nr_map = 0;
+		e820_add_region(0, LOWMEMSIZE(), E820_RAM);
+		e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
+#endif
+	}
+
+	/* In case someone cares... */
+	return who;
+}
+
+/* Overridden in paravirt.c if CONFIG_PARAVIRT */
+char * __init __attribute__((weak)) memory_setup(void)
+{
+	return machine_specific_memory_setup();
+}
+
+void __init setup_memory_map(void)
+{
+	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+	e820_print_map(memory_setup());
+}
+
+#ifdef CONFIG_X86_64
+int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
+{
+	int i;
 
+	if (slot < 0 || slot >= e820.nr_map)
+		return -1;
+	for (i = slot; i < e820.nr_map; i++) {
+		if (e820.map[i].type != E820_RAM)
+			continue;
+		break;
+	}
+	if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
+		return -1;
+	*addr = e820.map[i].addr;
+	*size = min_t(u64, e820.map[i].size + e820.map[i].addr,
+		max_pfn << PAGE_SHIFT) - *addr;
+	return i + 1;
+}
+#endif
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
deleted file mode 100644
index 1205ccf086d..00000000000
--- a/arch/x86/kernel/e820_32.c
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/ioport.h>
-#include <linux/string.h>
-#include <linux/kexec.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/pfn.h>
-#include <linux/uaccess.h>
-
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/e820.h>
-#include <asm/setup.h>
-
-/* Overridden in paravirt.c if CONFIG_PARAVIRT */
-char * __init __attribute__((weak)) memory_setup(void)
-{
-	return machine_specific_memory_setup();
-}
-
-void __init setup_memory_map(void)
-{
-	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
-	e820_print_map(memory_setup());
-}
-
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
deleted file mode 100644
index cb03bff9fa2..00000000000
--- a/arch/x86/kernel/e820_64.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Handle the memory map.
- * The functions here do the job until bootmem takes over.
- *
- *  Getting sanitize_e820_map() in sync with i386 version by applying change:
- *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
- *     Alex Achenbach <xela@slit.de>, December 2002.
- *  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- *
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/ioport.h>
-#include <linux/string.h>
-#include <linux/kexec.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/pfn.h>
-#include <linux/pci.h>
-
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/e820.h>
-#include <asm/proto.h>
-#include <asm/setup.h>
-#include <asm/sections.h>
-#include <asm/kdebug.h>
-#include <asm/trampoline.h>
-
-/*
- * PFN of last memory page.
- */
-unsigned long end_pfn;
-
-/*
- * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
- * The direct mapping extends to max_pfn_mapped, so that we can directly access
- * apertures, ACPI and other tables without having to play with fixmaps.
- */
-unsigned long max_pfn_mapped;
-
-static void early_panic(char *msg)
-{
-	early_printk(msg);
-	panic(msg);
-}
-
-/* We're not void only for x86 32-bit compat */
-char *__init machine_specific_memory_setup(void)
-{
-	char *who = "BIOS-e820";
-	int new_nr;
-	/*
-	 * Try to copy the BIOS-supplied E820-map.
-	 *
-	 * Otherwise fake a memory map; one section from 0k->640k,
-	 * the next section from 1mb->appropriate_mem_k
-	 */
-	new_nr = boot_params.e820_entries;
-	sanitize_e820_map(boot_params.e820_map,
-			ARRAY_SIZE(boot_params.e820_map),
-			&new_nr);
-	boot_params.e820_entries = new_nr;
-	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
-		early_panic("Cannot find a valid memory map");
-	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
-	e820_print_map(who);
-
-	/* In case someone cares... */
-	return who;
-}
-
-int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
-{
-	int i;
-
-	if (slot < 0 || slot >= e820.nr_map)
-		return -1;
-	for (i = slot; i < e820.nr_map; i++) {
-		if (e820.map[i].type != E820_RAM)
-			continue;
-		break;
-	}
-	if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
-		return -1;
-	*addr = e820.map[i].addr;
-	*size = min_t(u64, e820.map[i].size + e820.map[i].addr,
-		max_pfn << PAGE_SHIFT) - *addr;
-	return i + 1;
-}
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index ec65696ca2c..3220c7b56eb 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -266,12 +266,6 @@ static inline void __init reserve_crashkernel(void)
 {}
 #endif
 
-/* Overridden in paravirt.c if CONFIG_PARAVIRT */
-void __attribute__((weak)) __init memory_setup(void)
-{
-       machine_specific_memory_setup();
-}
-
 #ifdef CONFIG_PCI_MMCONFIG
 extern void __cpuinit fam10h_check_enable_mmcfg(void);
 extern void __init check_enable_amd_mmconf_dmi(void);
@@ -316,7 +310,7 @@ void __init setup_arch(char **cmdline_p)
 
 	ARCH_SETUP
 
-	memory_setup();
+	setup_memory_map();
 	copy_edd();
 
 	if (!boot_params.hdr.root_flags)
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 7ae692a7676..2f5e277686b 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -142,50 +142,3 @@ static int __init print_ipi_mode(void)
 
 late_initcall(print_ipi_mode);
 
-/**
- * machine_specific_memory_setup - Hook for machine specific memory setup.
- *
- * Description:
- *	This is included late in kernel/setup.c so that it can make
- *	use of all of the static functions.
- **/
-
-char * __init machine_specific_memory_setup(void)
-{
-	char *who;
-	int new_nr;
-
-
-	who = "BIOS-e820";
-
-	/*
-	 * Try to copy the BIOS-supplied E820-map.
-	 *
-	 * Otherwise fake a memory map; one section from 0k->640k,
-	 * the next section from 1mb->appropriate_mem_k
-	 */
-	new_nr = boot_params.e820_entries;
-	sanitize_e820_map(boot_params.e820_map,
-			ARRAY_SIZE(boot_params.e820_map),
-			&new_nr);
-	boot_params.e820_entries = new_nr;
-	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries)
-	    < 0) {
-		unsigned long mem_size;
-
-		/* compare results from other methods and take the greater */
-		if (boot_params.alt_mem_k
-		    < boot_params.screen_info.ext_mem_k) {
-			mem_size = boot_params.screen_info.ext_mem_k;
-			who = "BIOS-88";
-		} else {
-			mem_size = boot_params.alt_mem_k;
-			who = "BIOS-e801";
-		}
-
-		e820.nr_map = 0;
-		e820_add_region(0, LOWMEMSIZE(), E820_RAM);
-		e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
-  	}
-	return who;
-}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b8c2c1ef7ad..5266f3141d6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -47,6 +47,18 @@
 #include <asm/numa.h>
 #include <asm/cacheflush.h>
 
+/*
+ * PFN of last memory page.
+ */
+unsigned long end_pfn;
+
+/*
+ * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
+ * The direct mapping extends to max_pfn_mapped, so that we can directly access
+ * apertures, ACPI and other tables without having to play with fixmaps.
+ */
+unsigned long max_pfn_mapped;
+
 static unsigned long dma_reserve __initdata;
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index e860fe758e7..83144cb6c68 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -101,6 +101,9 @@ extern void e820_register_active_regions(int nid, unsigned long start_pfn,
 extern u64 e820_hole_size(u64 start, u64 end);
 extern void finish_e820_parsing(void);
 extern void e820_reserve_resources(void);
+extern void setup_memory_map(void);
+extern char *machine_specific_memory_setup(void);
+extern char *memory_setup(void);
 
 #endif /* __ASSEMBLY__ */
 
@@ -111,10 +114,10 @@ extern void e820_reserve_resources(void);
 #define BIOS_END		0x00100000
 
 #ifdef __KERNEL__
+#include <linux/ioport.h>
+
 #ifdef CONFIG_X86_32
-# include "e820_32.h"
-#else
-# include "e820_64.h"
+#define HIGH_MEMORY	(1024*1024)
 #endif
 #endif /* __KERNEL__ */
 
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
deleted file mode 100644
index 557b890549f..00000000000
--- a/include/asm-x86/e820_32.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * structures and definitions for the int 15, ax=e820 memory map
- * scheme.
- *
- * In a nutshell, arch/i386/boot/setup.S populates a scratch table
- * in the empty_zero_block that contains a list of usable address/size
- * duples.   In arch/i386/kernel/setup.c, this information is
- * transferred into the e820map, and in arch/i386/mm/init.c, that
- * new information is used to mark pages reserved or not.
- *
- */
-#ifndef __E820_HEADER
-#define __E820_HEADER
-
-#include <linux/ioport.h>
-
-#define HIGH_MEMORY	(1024*1024)
-
-#ifndef __ASSEMBLY__
-
-extern void setup_memory_map(void);
-
-#endif/*!__ASSEMBLY__*/
-#endif/*__E820_HEADER*/
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
deleted file mode 100644
index 8d992109969..00000000000
--- a/include/asm-x86/e820_64.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * structures and definitions for the int 15, ax=e820 memory map
- * scheme.
- *
- * In a nutshell, setup.S populates a scratch table in the
- * empty_zero_block that contains a list of usable address/size
- * duples.  setup.c, this information is transferred into the e820map,
- * and in init.c/numa.c, that new information is used to mark pages
- * reserved or not.
- */
-#ifndef __E820_HEADER
-#define __E820_HEADER
-
-#include <linux/ioport.h>
-
-#ifndef __ASSEMBLY__
-extern void setup_memory_region(void);
-extern void contig_e820_setup(void);
-extern int e820_any_non_reserved(unsigned long start, unsigned long end);
-extern int is_memory_any_valid(unsigned long start, unsigned long end);
-extern int e820_all_non_reserved(unsigned long start, unsigned long end);
-extern int is_memory_all_valid(unsigned long start, unsigned long end);
-
-#endif/*!__ASSEMBLY__*/
-
-#endif/*__E820_HEADER*/
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 9e163fc3e98..b676b0be798 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -8,7 +8,6 @@
 /* Interrupt control for vSMPowered x86_64 systems */
 void vsmp_init(void);
 
-char *machine_specific_memory_setup(void);
 #ifndef CONFIG_PARAVIRT
 #define paravirt_post_allocator_init()	do {} while (0)
 #endif
@@ -50,10 +49,6 @@ extern struct boot_params boot_params;
  */
 #define LOWMEMSIZE()	(0x9f000)
 
-char * __init machine_specific_memory_setup(void);
-char *memory_setup(void);
-
-
 void __init i386_start_kernel(void);
 
 extern unsigned long init_pg_tables_start;
-- 
cgit v1.2.3-70-g09d2


From 1c6e55032e24ff79668581a0f296c278ef7edd4e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 17 Jun 2008 15:41:45 -0700
Subject: x86: use acpi_numa_init to parse on 32-bit numa

seperate SRAT finding and parsing from get_memcfg_from_srat,
and let getmemcfg_from_srat only handle array from previous step.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_32.c |  36 +++++---
 arch/x86/kernel/srat_32.c  | 200 +++++++++++----------------------------------
 include/asm-x86/acpi.h     |   4 +-
 include/asm-x86/dmi.h      |   8 --
 4 files changed, 75 insertions(+), 173 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 90a2b857b4a..7e06ecd8317 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -59,6 +59,7 @@
 #include <asm/setup.h>
 #include <asm/arch_hooks.h>
 #include <asm/sections.h>
+#include <asm/dmi.h>
 #include <asm/io_apic.h>
 #include <asm/ist.h>
 #include <asm/io.h>
@@ -184,6 +185,12 @@ int bootloader_type;
 /* user-defined highmem size */
 static unsigned int highmem_pages = -1;
 
+/*
+ * Early DMI memory
+ */
+int dmi_alloc_index;
+char dmi_alloc_data[DMI_MAX_DATA];
+
 /*
  * Setup options
  */
@@ -775,6 +782,24 @@ void __init setup_arch(char **cmdline_p)
 		max_pfn = e820_end_of_ram();
 	}
 
+	dmi_scan_machine();
+
+	io_delay_init();
+
+#ifdef CONFIG_ACPI
+	/*
+	 * Parse the ACPI tables for possible boot-time SMP configuration.
+	 */
+	acpi_boot_table_init();
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+        /*
+         * Parse SRAT to discover nodes.
+         */
+        acpi_numa_init();
+#endif
+
 	max_low_pfn = setup_memory();
 
 #ifdef CONFIG_ACPI_SLEEP
@@ -841,10 +866,6 @@ void __init setup_arch(char **cmdline_p)
 
 	paravirt_post_allocator_init();
 
-	dmi_scan_machine();
-
-	io_delay_init();
-
 #ifdef CONFIG_X86_SMP
 	/*
 	 * setup to use the early static init tables during kernel startup
@@ -861,13 +882,6 @@ void __init setup_arch(char **cmdline_p)
 	generic_apic_probe();
 #endif
 
-#ifdef CONFIG_ACPI
-	/*
-	 * Parse the ACPI tables for possible boot-time SMP configuration.
-	 */
-	acpi_boot_table_init();
-#endif
-
 	early_quirks();
 
 #ifdef CONFIG_ACPI
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c
index e9d91720a40..5978023b799 100644
--- a/arch/x86/kernel/srat_32.c
+++ b/arch/x86/kernel/srat_32.c
@@ -42,7 +42,7 @@
 #define BMAP_TEST(bmap, bit)	((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
 /* bitmap length; _PXM is at most 255 */
 #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) 
-static u8 pxm_bitmap[PXM_BITMAP_LEN];	/* bitmap of proximity domains */
+static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN];	/* bitmap of proximity domains */
 
 #define MAX_CHUNKS_PER_NODE	3
 #define MAXCHUNKS		(MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
@@ -53,16 +53,37 @@ struct node_memory_chunk_s {
 	u8	nid;		// which cnode contains this chunk?
 	u8	bank;		// which mem bank on this node
 };
-static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
+static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
 
-static int num_memory_chunks;		/* total number of memory chunks */
+static int __initdata num_memory_chunks; /* total number of memory chunks */
 static u8 __initdata apicid_to_pxm[MAX_APICID];
 
+int numa_off __initdata;
+int acpi_numa __initdata;
+
+static __init void bad_srat(void)
+{
+        printk(KERN_ERR "SRAT: SRAT not used.\n");
+        acpi_numa = -1;
+	num_memory_chunks = 0;
+}
+
+static __init inline int srat_disabled(void)
+{
+	return numa_off || acpi_numa < 0;
+}
+
 /* Identify CPU proximity domains */
-static void __init parse_cpu_affinity_structure(char *p)
+void __init
+acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
 {
-	struct acpi_srat_cpu_affinity *cpu_affinity =
-				(struct acpi_srat_cpu_affinity *) p;
+	if (srat_disabled())
+		return;
+	if (cpu_affinity->header.length !=
+	     sizeof(struct acpi_srat_cpu_affinity)) {
+		bad_srat();
+		return;
+	}
 
 	if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0)
 		return;		/* empty entry */
@@ -80,14 +101,21 @@ static void __init parse_cpu_affinity_structure(char *p)
  * Identify memory proximity domains and hot-remove capabilities.
  * Fill node memory chunk list structure.
  */
-static void __init parse_memory_affinity_structure (char *sratp)
+void __init
+acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
 {
 	unsigned long long paddr, size;
 	unsigned long start_pfn, end_pfn;
 	u8 pxm;
 	struct node_memory_chunk_s *p, *q, *pend;
-	struct acpi_srat_mem_affinity *memory_affinity =
-			(struct acpi_srat_mem_affinity *) sratp;
+
+	if (srat_disabled())
+		return;
+	if (memory_affinity->header.length !=
+	     sizeof(struct acpi_srat_mem_affinity)) {
+		bad_srat();
+		return;
+	}
 
 	if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0)
 		return;		/* empty entry */
@@ -135,6 +163,14 @@ static void __init parse_memory_affinity_structure (char *sratp)
 		 "enabled and removable" : "enabled" ) );
 }
 
+/* Callback for SLIT parsing */
+void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
+{
+}
+
+void acpi_numa_arch_fixup(void)
+{
+}
 /*
  * The SRAT table always lists ascending addresses, so can always
  * assume that the first "start" address that you see is the real
@@ -167,39 +203,13 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c
 		node_end_pfn[nid] = memory_chunk->end_pfn;
 }
 
-/* Parse the ACPI Static Resource Affinity Table */
-static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
+int __init get_memcfg_from_srat(void)
 {
-	u8 *start, *end, *p;
 	int i, j, nid;
 
-	start = (u8 *)(&(sratp->reserved) + 1);	/* skip header */
-	p = start;
-	end = (u8 *)sratp + sratp->header.length;
 
-	memset(pxm_bitmap, 0, sizeof(pxm_bitmap));	/* init proximity domain bitmap */
-	memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
-
-	num_memory_chunks = 0;
-	while (p < end) {
-		switch (*p) {
-		case ACPI_SRAT_TYPE_CPU_AFFINITY:
-			parse_cpu_affinity_structure(p);
-			break;
-		case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
-			parse_memory_affinity_structure(p);
-			break;
-		default:
-			printk("ACPI 2.0 SRAT: unknown entry skipped: type=0x%02X, len=%d\n", p[0], p[1]);
-			break;
-		}
-		p += p[1];
-		if (p[1] == 0) {
-			printk("acpi20_parse_srat: Entry length value is zero;"
-				" can't parse any further!\n");
-			break;
-		}
-	}
+	if (srat_disabled())
+		goto out_fail;
 
 	if (num_memory_chunks == 0) {
 		printk("could not finy any ACPI SRAT memory areas.\n");
@@ -248,7 +258,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
 		e820_register_active_regions(chunk->nid, chunk->start_pfn,
 					     min(chunk->end_pfn, max_pfn));
 	}
- 
+
 	for_each_online_node(nid) {
 		unsigned long start = node_start_pfn[nid];
 		unsigned long end = min(node_end_pfn[nid], max_pfn);
@@ -258,118 +268,6 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
 	}
 	return 1;
 out_fail:
-	return 0;
-}
-
-struct acpi_static_rsdt {
-	struct acpi_table_rsdt table;
-	u32 padding[32]; /* Allow for 32 more table entries */
-};
-
-int __init get_memcfg_from_srat(void)
-{
-	struct acpi_table_header *header = NULL;
-	struct acpi_table_rsdp *rsdp = NULL;
-	struct acpi_table_rsdt *rsdt = NULL;
-	acpi_native_uint rsdp_address = 0;
-	struct acpi_static_rsdt saved_rsdt;
-	int tables = 0;
-	int i = 0;
-
-	rsdp_address = acpi_os_get_root_pointer();
-	if (!rsdp_address) {
-		printk("%s: System description tables not found\n",
-		       __func__);
-		goto out_err;
-	}
-
-	printk("%s: assigning address to rsdp\n", __func__);
-	rsdp = (struct acpi_table_rsdp *)(u32)rsdp_address;
-	if (!rsdp) {
-		printk("%s: Didn't find ACPI root!\n", __func__);
-		goto out_err;
-	}
-
-	printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision,
-		rsdp->oem_id);
-
-	if (strncmp(rsdp->signature, ACPI_SIG_RSDP,strlen(ACPI_SIG_RSDP))) {
-		printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __func__);
-		goto out_err;
-	}
-
-	rsdt = (struct acpi_table_rsdt *)
-	    early_ioremap(rsdp->rsdt_physical_address, sizeof(saved_rsdt));
-
-	if (!rsdt) {
-		printk(KERN_WARNING
-		       "%s: ACPI: Invalid root system description tables (RSDT)\n",
-		       __func__);
-		goto out_err;
-	}
-
-	header = &rsdt->header;
-
-	if (strncmp(header->signature, ACPI_SIG_RSDT, strlen(ACPI_SIG_RSDT))) {
-		printk(KERN_WARNING "ACPI: RSDT signature incorrect\n");
-		early_iounmap(rsdt, sizeof(saved_rsdt));
-		goto out_err;
-	}
-
-	/* 
-	 * The number of tables is computed by taking the 
-	 * size of all entries (header size minus total 
-	 * size of RSDT) divided by the size of each entry
-	 * (4-byte table pointers).
-	 */
-	tables = (header->length - sizeof(struct acpi_table_header)) / sizeof(u32);
-
-	if (!tables)
-		goto out_err;
-
-	memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt));
-	early_iounmap(rsdt, sizeof(saved_rsdt));
-	if (saved_rsdt.table.header.length > sizeof(saved_rsdt)) {
-		printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n",
-		       saved_rsdt.table.header.length);
-		goto out_err;
-	}
-
-	printk("Begin SRAT table scan....%d\n", tables);
-
-	for (i = 0; i < tables; i++){
-		int result;
-		u32 length;
-		/* Map in header, then map in full table length. */
-		header = (struct acpi_table_header *)
-			early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header));
-		if (!header)
-			break;
-
-                printk(KERN_INFO "ACPI: %4.4s %08lX, %04X\n",
-                           header->signature,
-		   (unsigned long)saved_rsdt.table.table_offset_entry[i],
-                           header->length);
-
-		if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4)) {
-			early_iounmap(header, sizeof(struct acpi_table_header));
-			continue;
-		}
-
-		length = header->length;
-		early_iounmap(header, sizeof(struct acpi_table_header));
-		header = (struct acpi_table_header *)
-			early_ioremap(saved_rsdt.table.table_offset_entry[i], length);
-		if (!header)
-			break;
-
-		/* we've found the srat table. don't need to look at any more tables */
-		result = acpi20_parse_srat((struct acpi_table_srat *)header);
-		early_iounmap(header, length);
-		return result;
-	}
-out_err:
-	remove_all_active_ranges();
 	printk("failed to get NUMA memory information from SRAT table\n");
 	return 0;
 }
diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h
index 73ce5b32443..635d764dc13 100644
--- a/include/asm-x86/acpi.h
+++ b/include/asm-x86/acpi.h
@@ -161,9 +161,7 @@ struct bootnode;
 #ifdef CONFIG_ACPI_NUMA
 extern int acpi_numa;
 extern int acpi_scan_nodes(unsigned long start, unsigned long end);
-#ifdef CONFIG_X86_64
-# define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-#endif
+#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
 extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
 				   int num_nodes);
 #else
diff --git a/include/asm-x86/dmi.h b/include/asm-x86/dmi.h
index 4edf7514a75..58a86571fe0 100644
--- a/include/asm-x86/dmi.h
+++ b/include/asm-x86/dmi.h
@@ -3,12 +3,6 @@
 
 #include <asm/io.h>
 
-#ifdef CONFIG_X86_32
-
-#define dmi_alloc alloc_bootmem
-
-#else /* CONFIG_X86_32 */
-
 #define DMI_MAX_DATA 2048
 
 extern int dmi_alloc_index;
@@ -25,8 +19,6 @@ static inline void *dmi_alloc(unsigned len)
 	return dmi_alloc_data + idx;
 }
 
-#endif
-
 /* Use early IO mappings for DMI because it's initialized early */
 #define dmi_ioremap early_ioremap
 #define dmi_iounmap early_iounmap
-- 
cgit v1.2.3-70-g09d2


From 66a6f8d539416c9664f3a04ecb12f55a0097ab8c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 18 Jun 2008 11:54:37 -0700
Subject: x86: remove unused file after numaq etc depends on genericarch

we don't need those mach_mpspec.h files now.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-bigsmp/mach_mpspec.h | 8 --------
 include/asm-x86/mach-es7000/mach_mpspec.h | 8 --------
 include/asm-x86/mach-numaq/mach_mpspec.h  | 8 --------
 include/asm-x86/mach-summit/mach_mpspec.h | 9 ---------
 4 files changed, 33 deletions(-)
 delete mode 100644 include/asm-x86/mach-bigsmp/mach_mpspec.h
 delete mode 100644 include/asm-x86/mach-es7000/mach_mpspec.h
 delete mode 100644 include/asm-x86/mach-numaq/mach_mpspec.h
 delete mode 100644 include/asm-x86/mach-summit/mach_mpspec.h

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-bigsmp/mach_mpspec.h b/include/asm-x86/mach-bigsmp/mach_mpspec.h
deleted file mode 100644
index 6b5dadcf1d0..00000000000
--- a/include/asm-x86/mach-bigsmp/mach_mpspec.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __ASM_MACH_MPSPEC_H
-#define __ASM_MACH_MPSPEC_H
-
-#define MAX_IRQ_SOURCES 256
-
-#define MAX_MP_BUSSES 32
-
-#endif /* __ASM_MACH_MPSPEC_H */
diff --git a/include/asm-x86/mach-es7000/mach_mpspec.h b/include/asm-x86/mach-es7000/mach_mpspec.h
deleted file mode 100644
index b1f5039d450..00000000000
--- a/include/asm-x86/mach-es7000/mach_mpspec.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __ASM_MACH_MPSPEC_H
-#define __ASM_MACH_MPSPEC_H
-
-#define MAX_IRQ_SOURCES 256
-
-#define MAX_MP_BUSSES 256
-
-#endif /* __ASM_MACH_MPSPEC_H */
diff --git a/include/asm-x86/mach-numaq/mach_mpspec.h b/include/asm-x86/mach-numaq/mach_mpspec.h
deleted file mode 100644
index dffb09856f8..00000000000
--- a/include/asm-x86/mach-numaq/mach_mpspec.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __ASM_MACH_MPSPEC_H
-#define __ASM_MACH_MPSPEC_H
-
-#define MAX_IRQ_SOURCES 512
-
-#define MAX_MP_BUSSES 32
-
-#endif /* __ASM_MACH_MPSPEC_H */
diff --git a/include/asm-x86/mach-summit/mach_mpspec.h b/include/asm-x86/mach-summit/mach_mpspec.h
deleted file mode 100644
index bd765523511..00000000000
--- a/include/asm-x86/mach-summit/mach_mpspec.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __ASM_MACH_MPSPEC_H
-#define __ASM_MACH_MPSPEC_H
-
-#define MAX_IRQ_SOURCES 256
-
-/* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */
-#define MAX_MP_BUSSES 260
-
-#endif /* __ASM_MACH_MPSPEC_H */
-- 
cgit v1.2.3-70-g09d2


From 95a71a45c250177854f7c530810c88a8a19a443b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 18 Jun 2008 17:27:08 -0700
Subject: x86: cleanup machine_specific_memory_setup, v2

1. let 64bit support 88 and e801 too
2. introduce default_machine_specific_memory_setup, and reuse it
   for voyager

v2: fix 64 bit compiling

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c        | 13 +++++++------
 arch/x86/mach-voyager/setup.c | 32 +-------------------------------
 include/asm-x86/e820.h        |  3 +--
 include/asm-x86/setup.h       |  3 ++-
 4 files changed, 11 insertions(+), 40 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 49477484a2f..7b613d2efb0 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1029,7 +1029,7 @@ void __init e820_reserve_resources(void)
 	}
 }
 
-char *__init __attribute__((weak)) machine_specific_memory_setup(void)
+char *__init default_machine_specific_memory_setup(void)
 {
 	char *who = "BIOS-e820";
 	int new_nr;
@@ -1045,10 +1045,7 @@ char *__init __attribute__((weak)) machine_specific_memory_setup(void)
 			&new_nr);
 	boot_params.e820_entries = new_nr;
 	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) {
-#ifdef CONFIG_X86_64
-		early_panic("Cannot find a valid memory map");
-#else
-		unsigned long mem_size;
+		u64 mem_size;
 
 		/* compare results from other methods and take the greater */
 		if (boot_params.alt_mem_k
@@ -1063,13 +1060,17 @@ char *__init __attribute__((weak)) machine_specific_memory_setup(void)
 		e820.nr_map = 0;
 		e820_add_region(0, LOWMEMSIZE(), E820_RAM);
 		e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
-#endif
 	}
 
 	/* In case someone cares... */
 	return who;
 }
 
+char *__init __attribute__((weak)) machine_specific_memory_setup(void)
+{
+	return default_machine_specific_memory_setup();
+}
+
 /* Overridden in paravirt.c if CONFIG_PARAVIRT */
 char * __init __attribute__((weak)) memory_setup(void)
 {
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index f27b583154e..6bbdd633864 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -104,35 +104,5 @@ char *__init machine_specific_memory_setup(void)
 		return who;
 	}
 
-	who = "BIOS-e820";
-
-	/*
-	 * Try to copy the BIOS-supplied E820-map.
-	 *
-	 * Otherwise fake a memory map; one section from 0k->640k,
-	 * the next section from 1mb->appropriate_mem_k
-	 */
-	new_nr = boot_params.e820_entries;
-	sanitize_e820_map(boot_params.e820_map,
-			ARRAY_SIZE(boot_params.e820_map),
-			&new_nr);
-	boot_params.e820_entries = new_nr;
-	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries)
-	    < 0) {
-		unsigned long mem_size;
-
-		/* compare results from other methods and take the greater */
-		if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) {
-			mem_size = boot_params.screen_info.ext_mem_k;
-			who = "BIOS-88";
-		} else {
-			mem_size = boot_params.alt_mem_k;
-			who = "BIOS-e801";
-		}
-
-		e820.nr_map = 0;
-		e820_add_region(0, LOWMEMSIZE(), E820_RAM);
-		e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
-	}
-	return who;
+	return default_machine_specific_memory_setup();
 }
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 83144cb6c68..668a0d743d2 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -102,6 +102,7 @@ extern u64 e820_hole_size(u64 start, u64 end);
 extern void finish_e820_parsing(void);
 extern void e820_reserve_resources(void);
 extern void setup_memory_map(void);
+extern char *default_machine_specific_memory_setup(void);
 extern char *machine_specific_memory_setup(void);
 extern char *memory_setup(void);
 
@@ -116,9 +117,7 @@ extern char *memory_setup(void);
 #ifdef __KERNEL__
 #include <linux/ioport.h>
 
-#ifdef CONFIG_X86_32
 #define HIGH_MEMORY	(1024*1024)
-#endif
 #endif /* __KERNEL__ */
 
 #endif  /* __ASM_E820_H */
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index b676b0be798..e14b6e73d26 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -42,13 +42,14 @@ void vsmp_init(void);
  */
 extern struct boot_params boot_params;
 
-#ifdef __i386__
 /*
  * Do NOT EVER look at the BIOS memory size location.
  * It does not work on many machines.
  */
 #define LOWMEMSIZE()	(0x9f000)
 
+#ifdef __i386__
+
 void __init i386_start_kernel(void);
 
 extern unsigned long init_pg_tables_start;
-- 
cgit v1.2.3-70-g09d2


From fcfa146e412023dd55f8855f240b2c2082dc1baa Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 18 Jun 2008 17:29:31 -0700
Subject: x86: update mptable fix with no ioapic v2

if the system doesn't have ioapic, we don't need to store entries for mptable
update

also let mp_config_acpi_gsi not call func in mpparse
so later could decouple mpparse with acpi more easily

Reported-by: Daniel Exner <dex@dragonslave.de>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Daniel Exner <dex@dragonslave.de>
Cc: Len Brown <lenb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c | 87 +++++++++++++++++++++++++++++----------------
 arch/x86/kernel/mpparse.c   | 19 +++++-----
 include/asm-x86/mpspec.h    |  2 --
 3 files changed, 65 insertions(+), 43 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 29b365a66cf..91bb9a99338 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -960,10 +960,37 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 	nr_ioapics++;
 }
 
+static void assign_to_mp_irq(struct mp_config_intsrc *m,
+				    struct mp_config_intsrc *mp_irq)
+{
+	memcpy(mp_irq, m, sizeof(struct mp_config_intsrc));
+}
+
+static int mp_irq_cmp(struct mp_config_intsrc *mp_irq,
+				struct mp_config_intsrc *m)
+{
+	return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc));
+}
+
+static void save_mp_irq(struct mp_config_intsrc *m)
+{
+	int i;
+
+	for (i = 0; i < mp_irq_entries; i++) {
+		if (!mp_irq_cmp(&mp_irqs[i], m))
+			return;
+	}
+
+	assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
+	if (++mp_irq_entries == MAX_IRQ_SOURCES)
+		panic("Max # of irq sources exceeded!!\n");
+}
+
 void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 {
 	int ioapic;
 	int pin;
+	struct mp_config_intsrc mp_irq;
 
 	/*
 	 * Convert 'gsi' to 'ioapic.pin'.
@@ -981,18 +1008,15 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 	if ((bus_irq == 0) && (trigger == 3))
 		trigger = 1;
 
-	mp_irqs[mp_irq_entries].mp_type = MP_INTSRC;
-	mp_irqs[mp_irq_entries].mp_irqtype = mp_INT;
-	mp_irqs[mp_irq_entries].mp_irqflag = (trigger << 2) | polarity;
-	mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS;
-	mp_irqs[mp_irq_entries].mp_srcbusirq = bus_irq;	/* IRQ */
-	mp_irqs[mp_irq_entries].mp_dstapic =
-			mp_ioapics[ioapic].mp_apicid;	/* APIC ID */
-	mp_irqs[mp_irq_entries].mp_dstirq = pin;	/* INTIN# */
-
-	if (++mp_irq_entries == MAX_IRQ_SOURCES)
-		panic("Max # of irq sources exceeded!!\n");
+	mp_irq.mp_type = MP_INTSRC;
+	mp_irq.mp_irqtype = mp_INT;
+	mp_irq.mp_irqflag = (trigger << 2) | polarity;
+	mp_irq.mp_srcbus = MP_ISA_BUS;
+	mp_irq.mp_srcbusirq = bus_irq;	/* IRQ */
+	mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */
+	mp_irq.mp_dstirq = pin;	/* INTIN# */
 
+	save_mp_irq(&mp_irq);
 }
 
 void __init mp_config_acpi_legacy_irqs(void)
@@ -1000,6 +1024,7 @@ void __init mp_config_acpi_legacy_irqs(void)
 	int i;
 	int ioapic;
 	unsigned int dstapic;
+	struct mp_config_intsrc mp_irq;
 
 #if defined (CONFIG_MCA) || defined (CONFIG_EISA)
 	/*
@@ -1052,16 +1077,15 @@ void __init mp_config_acpi_legacy_irqs(void)
 			continue;	/* IRQ already used */
 		}
 
-		mp_irqs[mp_irq_entries].mp_type = MP_INTSRC;
-		mp_irqs[mp_irq_entries].mp_irqflag = 0;	/* Conforming */
-		mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS;
-		mp_irqs[mp_irq_entries].mp_dstapic = dstapic;
-		mp_irqs[mp_irq_entries].mp_irqtype = mp_INT;
-		mp_irqs[mp_irq_entries].mp_srcbusirq = i; /* Identity mapped */
-		mp_irqs[mp_irq_entries].mp_dstirq = i;
+		mp_irq.mp_type = MP_INTSRC;
+		mp_irq.mp_irqflag = 0;	/* Conforming */
+		mp_irq.mp_srcbus = MP_ISA_BUS;
+		mp_irq.mp_dstapic = dstapic;
+		mp_irq.mp_irqtype = mp_INT;
+		mp_irq.mp_srcbusirq = i; /* Identity mapped */
+		mp_irq.mp_dstirq = i;
 
-		if (++mp_irq_entries == MAX_IRQ_SOURCES)
-			panic("Max # of irq sources exceeded!!\n");
+		save_mp_irq(&mp_irq);
 	}
 }
 
@@ -1169,25 +1193,26 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
 			u32 gsi, int triggering, int polarity)
 {
-	struct mpc_config_intsrc intsrc;
+#ifdef CONFIG_X86_MPPARSE
+	struct mp_config_intsrc mp_irq;
 	int ioapic;
 
-	if (!enable_update_mptable)
+	if (!acpi_ioapic)
 		return 0;
 
 	/* print the entry should happen on mptable identically */
-	intsrc.mpc_type = MP_INTSRC;
-	intsrc.mpc_irqtype = mp_INT;
-	intsrc.mpc_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
+	mp_irq.mp_type = MP_INTSRC;
+	mp_irq.mp_irqtype = mp_INT;
+	mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
 				(polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
-	intsrc.mpc_srcbus = number;
-	intsrc.mpc_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
+	mp_irq.mp_srcbus = number;
+	mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
 	ioapic = mp_find_ioapic(gsi);
-	intsrc.mpc_dstapic = mp_ioapic_routing[ioapic].apic_id;
-	intsrc.mpc_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
-
-	MP_intsrc_info(&intsrc);
+	mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id;
+	mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
 
+	save_mp_irq(&mp_irq);
+#endif
 	return 0;
 }
 
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 014ac5d90f8..8b6b1e05c30 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -34,8 +34,6 @@
 #include <mach_mpparse.h>
 #endif
 
-int enable_update_mptable;
-
 /*
  * Checksum an MP configuration block.
  */
@@ -246,7 +244,7 @@ static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
 		mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
 }
 
-static void assign_to_mp_irq(struct mpc_config_intsrc *m,
+static void __init assign_to_mp_irq(struct mpc_config_intsrc *m,
 				    struct mp_config_intsrc *mp_irq)
 {
 	mp_irq->mp_dstapic = m->mpc_dstapic;
@@ -270,7 +268,7 @@ static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
 	m->mpc_dstirq = mp_irq->mp_dstirq;
 }
 
-static int mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
+static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
 					struct mpc_config_intsrc *m)
 {
 	if (mp_irq->mp_dstapic != m->mpc_dstapic)
@@ -291,17 +289,16 @@ static int mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
 	return 0;
 }
 
-void MP_intsrc_info(struct mpc_config_intsrc *m)
+static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
 {
 	int i;
 
 	print_MP_intsrc_info(m);
 
-	if (enable_update_mptable)
-		for (i = 0; i < mp_irq_entries; i++) {
-			if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
-				return;
-		}
+	for (i = 0; i < mp_irq_entries; i++) {
+		if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
+			return;
+	}
 
 	assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
 	if (++mp_irq_entries == MAX_IRQ_SOURCES)
@@ -1113,6 +1110,8 @@ out:
 	return 0;
 }
 
+static int __initdata enable_update_mptable;
+
 static int __init update_mptable_setup(char *str)
 {
 	enable_update_mptable = 1;
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index f48dbca740e..6ec1a5453b3 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -59,9 +59,7 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 				   u32 gsi);
 extern void mp_config_acpi_legacy_irqs(void);
 extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
-extern void MP_intsrc_info(struct mpc_config_intsrc *m);
 #ifdef CONFIG_X86_IO_APIC
-extern int enable_update_mptable;
 extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
 				u32 gsi, int triggering, int polarity);
 #else
-- 
cgit v1.2.3-70-g09d2


From 23ca4bba3e20c6c3cb11c1bb0ab4770b724d39ac Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Mon, 12 May 2008 21:21:12 +0200
Subject: x86: cleanup early per cpu variables/accesses v4

  * Introduce a new PER_CPU macro called "EARLY_PER_CPU".  This is
    used by some per_cpu variables that are initialized and accessed
    before there are per_cpu areas allocated.

    ["Early" in respect to per_cpu variables is "earlier than the per_cpu
    areas have been setup".]

    This patchset adds these new macros:

	DEFINE_EARLY_PER_CPU(_type, _name, _initvalue)
	EXPORT_EARLY_PER_CPU_SYMBOL(_name)
	DECLARE_EARLY_PER_CPU(_type, _name)

	early_per_cpu_ptr(_name)
	early_per_cpu_map(_name, _idx)
	early_per_cpu(_name, _cpu)

    The DEFINE macro defines the per_cpu variable as well as the early
    map and pointer.  It also initializes the per_cpu variable and map
    elements to "_initvalue".  The early_* macros provide access to
    the initial map (usually setup during system init) and the early
    pointer.  This pointer is initialized to point to the early map
    but is then NULL'ed when the actual per_cpu areas are setup.  After
    that the per_cpu variable is the correct access to the variable.

    The early_per_cpu() macro is not very efficient but does show how to
    access the variable if you have a function that can be called both
    "early" and "late".  It tests the early ptr to be NULL, and if not
    then it's still valid.  Otherwise, the per_cpu variable is used
    instead:

	#define early_per_cpu(_name, _cpu) 			\
		(early_per_cpu_ptr(_name) ?			\
			early_per_cpu_ptr(_name)[_cpu] :	\
			per_cpu(_name, _cpu))

    A better method is to actually check the pointer manually.  In the
    case below, numa_set_node can be called both "early" and "late":

	void __cpuinit numa_set_node(int cpu, int node)
	{
	    int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);

	    if (cpu_to_node_map)
		    cpu_to_node_map[cpu] = node;
	    else
		    per_cpu(x86_cpu_to_node_map, cpu) = node;
	}

  * Add a flag "arch_provides_topology_pointers" that indicates pointers
    to topology cpumask_t maps are available.  Otherwise, use the function
    returning the cpumask_t value.  This is useful if cpumask_t set size
    is very large to avoid copying data on to/off of the stack.

  * The coverage of CONFIG_DEBUG_PER_CPU_MAPS has been increased while
    the non-debug case has been optimized a bit.

  * Remove an unreferenced compiler warning in drivers/base/topology.c

  * Clean up #ifdef in setup.c

For inclusion into sched-devel/latest tree.

Based on:
	git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
    +   sched-devel/latest  .../mingo/linux-2.6-sched-devel.git

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig           |   2 +-
 arch/x86/Kconfig.debug     |   2 +-
 arch/x86/kernel/apic_32.c  |   9 +--
 arch/x86/kernel/apic_64.c  |  11 ++--
 arch/x86/kernel/setup.c    |  96 ++++++++++++++++++++++++++----
 arch/x86/kernel/setup_32.c |  24 --------
 arch/x86/kernel/setup_64.c |   9 ---
 arch/x86/kernel/smpboot.c  |  20 +------
 arch/x86/mm/numa_64.c      |  43 ++++----------
 arch/x86/mm/srat_64.c      |   2 +-
 drivers/base/topology.c    |  25 +++++++-
 include/asm-x86/numa_64.h  |  19 +++---
 include/asm-x86/percpu.h   |  46 +++++++++++++++
 include/asm-x86/smp.h      |  15 +----
 include/asm-x86/topology.h | 143 ++++++++++++++++++++++++++-------------------
 15 files changed, 270 insertions(+), 196 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2e325521e5e..4469a0db1ae 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -121,7 +121,7 @@ config ARCH_HAS_CACHE_LINE_SIZE
 	def_bool y
 
 config HAVE_SETUP_PER_CPU_AREA
-	def_bool X86_64 || (X86_SMP && !X86_VOYAGER)
+	def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER)
 
 config HAVE_CPUMASK_OF_CPU_MAP
 	def_bool X86_64_SMP
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 18363374d51..24ca95a0ba5 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -60,7 +60,7 @@ config DEBUG_PAGEALLOC
 config DEBUG_PER_CPU_MAPS
 	bool "Debug access to per_cpu maps"
 	depends on DEBUG_KERNEL
-	depends on X86_64_SMP
+	depends on X86_SMP
 	default n
 	help
 	  Say Y to verify that the per_cpu map being accessed has
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4b99b1bdeb6..f17c1c1bc38 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -52,9 +52,6 @@
 
 unsigned long mp_lapic_addr;
 
-DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
-EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
-
 /*
  * Knob to control our willingness to enable the local APIC.
  *
@@ -1534,9 +1531,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	}
 #ifdef CONFIG_SMP
 	/* are we being called early in kernel startup? */
-	if (x86_cpu_to_apicid_early_ptr) {
-		u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
-		u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+	if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
+		u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+		u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
 
 		cpu_to_apicid[cpu] = apicid;
 		bios_cpu_apicid[cpu] = apicid;
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 0633cfd0dc2..4fd21f7d698 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -87,9 +87,6 @@ static unsigned long apic_phys;
 
 unsigned long mp_lapic_addr;
 
-DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
-EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
-
 unsigned int __cpuinitdata maxcpus = NR_CPUS;
 /*
  * Get the LAPIC version
@@ -1091,9 +1088,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
 		cpu = 0;
 	}
 	/* are we being called early in kernel startup? */
-	if (x86_cpu_to_apicid_early_ptr) {
-		u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
-		u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+	if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
+		u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+		u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
 
 		cpu_to_apicid[cpu] = apicid;
 		bios_cpu_apicid[cpu] = apicid;
@@ -1269,7 +1266,7 @@ __cpuinit int apic_is_clustered_box(void)
 	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
 		return 0;
 
-	bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
 	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
 
 	for (i = 0; i < NR_CPUS; i++) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 6f80b852a19..03caa8e4351 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -19,13 +19,23 @@ unsigned disabled_cpus __cpuinitdata;
 unsigned int boot_cpu_physical_apicid = -1U;
 EXPORT_SYMBOL(boot_cpu_physical_apicid);
 
-DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
-EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
-
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map;
 #endif
 
+/* map cpu index to physical APIC ID */
+DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
+DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+
+#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#define	X86_64_NUMA	1
+
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+#endif
+
 #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
 /*
  * Copy data used in early init routines from the initial arrays to the
@@ -37,20 +47,21 @@ static void __init setup_per_cpu_maps(void)
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
+		per_cpu(x86_cpu_to_apicid, cpu) =
+				early_per_cpu_map(x86_cpu_to_apicid, cpu);
 		per_cpu(x86_bios_cpu_apicid, cpu) =
-						x86_bios_cpu_apicid_init[cpu];
-#ifdef CONFIG_NUMA
+				early_per_cpu_map(x86_bios_cpu_apicid, cpu);
+#ifdef X86_64_NUMA
 		per_cpu(x86_cpu_to_node_map, cpu) =
-						x86_cpu_to_node_map_init[cpu];
+				early_per_cpu_map(x86_cpu_to_node_map, cpu);
 #endif
 	}
 
 	/* indicate the early static arrays will soon be gone */
-	x86_cpu_to_apicid_early_ptr = NULL;
-	x86_bios_cpu_apicid_early_ptr = NULL;
-#ifdef CONFIG_NUMA
-	x86_cpu_to_node_map_early_ptr = NULL;
+	early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
+	early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
+#ifdef X86_64_NUMA
+	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
 }
 
@@ -109,7 +120,8 @@ void __init setup_per_cpu_areas(void)
 		if (!node_online(node) || !NODE_DATA(node)) {
 			ptr = alloc_bootmem_pages(size);
 			printk(KERN_INFO
-			       "cpu %d has no node or node-local memory\n", i);
+			       "cpu %d has no node %d or node-local memory\n",
+				i, node);
 		}
 		else
 			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
@@ -137,3 +149,63 @@ void __init setup_per_cpu_areas(void)
 }
 
 #endif
+
+#ifdef X86_64_NUMA
+void __cpuinit numa_set_node(int cpu, int node)
+{
+	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
+
+	if (cpu_to_node_map)
+		cpu_to_node_map[cpu] = node;
+
+	else if (per_cpu_offset(cpu))
+		per_cpu(x86_cpu_to_node_map, cpu) = node;
+
+	else
+		Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
+}
+
+void __cpuinit numa_clear_node(int cpu)
+{
+	numa_set_node(cpu, NUMA_NO_NODE);
+}
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+	cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+	cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
+}
+#endif /* CONFIG_NUMA */
+
+#if defined(CONFIG_DEBUG_PER_CPU_MAPS) && defined(CONFIG_X86_64)
+
+int cpu_to_node(int cpu)
+{
+	if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
+		printk(KERN_WARNING
+			"cpu_to_node(%d): usage too early!\n", cpu);
+		dump_stack();
+		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+	}
+	return per_cpu(x86_cpu_to_node_map, cpu);
+}
+EXPORT_SYMBOL(cpu_to_node);
+
+int early_cpu_to_node(int cpu)
+{
+	if (early_per_cpu_ptr(x86_cpu_to_node_map))
+		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+
+	if (!per_cpu_offset(cpu)) {
+		printk(KERN_WARNING
+			"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
+			dump_stack();
+		return NUMA_NO_NODE;
+	}
+	return per_cpu(x86_cpu_to_node_map, cpu);
+}
+#endif
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 5a2f8e06388..ccd5f5cdbbe 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -737,18 +737,6 @@ char * __init __attribute__((weak)) memory_setup(void)
 	return machine_specific_memory_setup();
 }
 
-#ifdef CONFIG_NUMA
-/*
- * In the golden day, when everything among i386 and x86_64 will be
- * integrated, this will not live here
- */
-void *x86_cpu_to_node_map_early_ptr;
-int x86_cpu_to_node_map_init[NR_CPUS] = {
-	[0 ... NR_CPUS-1] = NUMA_NO_NODE
-};
-DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
-#endif
-
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -887,18 +875,6 @@ void __init setup_arch(char **cmdline_p)
 
 	io_delay_init();
 
-#ifdef CONFIG_X86_SMP
-	/*
-	 * setup to use the early static init tables during kernel startup
-	 * X86_SMP will exclude sub-arches that don't deal well with it.
-	 */
-	x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
-	x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
-#ifdef CONFIG_NUMA
-	x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
-#endif
-#endif
-
 #ifdef CONFIG_X86_GENERICARCH
 	generic_apic_probe();
 #endif
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 6dff1286ad8..e8df64fad54 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -406,15 +406,6 @@ void __init setup_arch(char **cmdline_p)
 	kvmclock_init();
 #endif
 
-#ifdef CONFIG_SMP
-	/* setup to use the early static init tables during kernel startup */
-	x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
-	x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
-#ifdef CONFIG_NUMA
-	x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
-#endif
-#endif
-
 #ifdef CONFIG_ACPI
 	/*
 	 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3e1cecedde4..036604d3dae 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -68,22 +68,6 @@
 #include <mach_wakecpu.h>
 #include <smpboot_hooks.h>
 
-/*
- * FIXME: For x86_64, those are defined in other files. But moving them here,
- * would make the setup areas dependent on smp, which is a loss. When we
- * integrate apic between arches, we can probably do a better job, but
- * right now, they'll stay here -- glommer
- */
-
-/* which logical CPU number maps to which CPU (physical APIC ID) */
-u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
-			{ [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_cpu_to_apicid_early_ptr;
-
-u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
-				= { [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_bios_cpu_apicid_early_ptr;
-
 #ifdef CONFIG_X86_32
 u8 apicid_2_node[MAX_APICID];
 static int low_mappings;
@@ -992,7 +976,7 @@ do_rest:
 		/* Try to put things back the way they were before ... */
 		unmap_cpu_to_logical_apicid(cpu);
 #ifdef CONFIG_X86_64
-		clear_node_cpumask(cpu); /* was set by numa_add_cpu */
+		numa_remove_cpu(cpu); /* was set by numa_add_cpu */
 #endif
 		cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
 		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
@@ -1373,7 +1357,7 @@ static void __ref remove_cpu_from_maps(int cpu)
 	cpu_clear(cpu, cpu_callin_map);
 	/* was set by cpu_init() */
 	clear_bit(cpu, (unsigned long *)&cpu_initialized);
-	clear_node_cpumask(cpu);
+	numa_remove_cpu(cpu);
 #endif
 }
 
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index c5066d519e5..970f86775c4 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -31,16 +31,6 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 
 struct memnode memnode;
 
-#ifdef CONFIG_SMP
-int x86_cpu_to_node_map_init[NR_CPUS] = {
-	[0 ... NR_CPUS-1] = NUMA_NO_NODE
-};
-void *x86_cpu_to_node_map_early_ptr;
-EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
-#endif
-DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
-EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-
 s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
@@ -577,24 +567,6 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
 }
 
-__cpuinit void numa_add_cpu(int cpu)
-{
-	set_bit(cpu,
-		(unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-
-void __cpuinit numa_set_node(int cpu, int node)
-{
-	int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
-
-	if(cpu_to_node_map)
-		cpu_to_node_map[cpu] = node;
-	else if(per_cpu_offset(cpu))
-		per_cpu(x86_cpu_to_node_map, cpu) = node;
-	else
-		Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
-}
-
 unsigned long __init numa_free_all_bootmem(void)
 {
 	unsigned long pages = 0;
@@ -641,6 +613,7 @@ static __init int numa_setup(char *opt)
 }
 early_param("numa", numa_setup);
 
+#ifdef CONFIG_NUMA
 /*
  * Setup early cpu_to_node.
  *
@@ -652,14 +625,19 @@ early_param("numa", numa_setup);
  * is already initialized in a round robin manner at numa_init_array,
  * prior to this call, and this initialization is good enough
  * for the fake NUMA cases.
+ *
+ * Called before the per_cpu areas are setup.
  */
 void __init init_cpu_to_node(void)
 {
-	int i;
+	int cpu;
+	u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
 
-	for (i = 0; i < NR_CPUS; i++) {
+	BUG_ON(cpu_to_apicid == NULL);
+
+	for_each_possible_cpu(cpu) {
 		int node;
-		u16 apicid = x86_cpu_to_apicid_init[i];
+		u16 apicid = cpu_to_apicid[cpu];
 
 		if (apicid == BAD_APICID)
 			continue;
@@ -668,8 +646,9 @@ void __init init_cpu_to_node(void)
 			continue;
 		if (!node_online(node))
 			continue;
-		numa_set_node(i, node);
+		numa_set_node(cpu, node);
 	}
 }
+#endif
 
 
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 99649dccad2..012220e31c9 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -376,7 +376,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 		if (node == NUMA_NO_NODE)
 			continue;
 		if (!node_isset(node, node_possible_map))
-			numa_set_node(i, NUMA_NO_NODE);
+			numa_clear_node(i);
 	}
 	numa_init_array();
 	return 0;
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index fdf4044d2e7..1efe162e16d 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -40,6 +40,7 @@ static ssize_t show_##name(struct sys_device *dev, char *buf)	\
 	return sprintf(buf, "%d\n", topology_##name(cpu));	\
 }
 
+#if defined(topology_thread_siblings) || defined(topology_core_siblings)
 static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf)
 {
 	ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
@@ -54,21 +55,41 @@ static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf)
 	}
 	return n;
 }
+#endif
 
+#ifdef arch_provides_topology_pointers
 #define define_siblings_show_map(name)					\
-static inline ssize_t show_##name(struct sys_device *dev, char *buf)	\
+static ssize_t show_##name(struct sys_device *dev, char *buf)	\
 {									\
 	unsigned int cpu = dev->id;					\
 	return show_cpumap(0, &(topology_##name(cpu)), buf);		\
 }
 
 #define define_siblings_show_list(name)					\
-static inline ssize_t show_##name##_list(struct sys_device *dev, char *buf) \
+static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \
 {									\
 	unsigned int cpu = dev->id;					\
 	return show_cpumap(1, &(topology_##name(cpu)), buf);		\
 }
 
+#else
+#define define_siblings_show_map(name)					\
+static ssize_t show_##name(struct sys_device *dev, char *buf)	\
+{									\
+	unsigned int cpu = dev->id;					\
+	cpumask_t mask = topology_##name(cpu);				\
+	return show_cpumap(0, &mask, buf);				\
+}
+
+#define define_siblings_show_list(name)					\
+static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \
+{									\
+	unsigned int cpu = dev->id;					\
+	cpumask_t mask = topology_##name(cpu);				\
+	return show_cpumap(1, &mask, buf);				\
+}
+#endif
+
 #define define_siblings_show_func(name)		\
 	define_siblings_show_map(name); define_siblings_show_list(name)
 
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index 22e87c9f6a8..b510daf4f4d 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -14,11 +14,9 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 
 #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
 
-extern void numa_add_cpu(int cpu);
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern void numa_set_node(int cpu, int node);
 extern void srat_reserve_add_area(int nodeid);
 extern int hotadd_percent;
 
@@ -31,15 +29,16 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 
 #ifdef CONFIG_NUMA
 extern void __init init_cpu_to_node(void);
-
-static inline void clear_node_cpumask(int cpu)
-{
-	clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]);
-}
-
+extern void __cpuinit numa_set_node(int cpu, int node);
+extern void __cpuinit numa_clear_node(int cpu);
+extern void __cpuinit numa_add_cpu(int cpu);
+extern void __cpuinit numa_remove_cpu(int cpu);
 #else
-#define init_cpu_to_node() do {} while (0)
-#define clear_node_cpumask(cpu) do {} while (0)
+static inline void init_cpu_to_node(void)		{ }
+static inline void numa_set_node(int cpu, int node)	{ }
+static inline void numa_clear_node(int cpu)		{ }
+static inline void numa_add_cpu(int cpu, int node)	{ }
+static inline void numa_remove_cpu(int cpu)		{ }
 #endif
 
 #endif
diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h
index 736fc3bb8e1..912a3a17b9d 100644
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -143,4 +143,50 @@ do {							\
 #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)
 #endif /* !__ASSEMBLY__ */
 #endif /* !CONFIG_X86_64 */
+
+#ifdef CONFIG_SMP
+
+/*
+ * Define the "EARLY_PER_CPU" macros.  These are used for some per_cpu
+ * variables that are initialized and accessed before there are per_cpu
+ * areas allocated.
+ */
+
+#define	DEFINE_EARLY_PER_CPU(_type, _name, _initvalue)			\
+	DEFINE_PER_CPU(_type, _name) = _initvalue;			\
+	__typeof__(_type) _name##_early_map[NR_CPUS] __initdata =	\
+				{ [0 ... NR_CPUS-1] = _initvalue };	\
+	__typeof__(_type) *_name##_early_ptr = _name##_early_map
+
+#define EXPORT_EARLY_PER_CPU_SYMBOL(_name)			\
+	EXPORT_PER_CPU_SYMBOL(_name)
+
+#define DECLARE_EARLY_PER_CPU(_type, _name)			\
+	DECLARE_PER_CPU(_type, _name);				\
+	extern __typeof__(_type) *_name##_early_ptr;		\
+	extern __typeof__(_type)  _name##_early_map[]
+
+#define	early_per_cpu_ptr(_name) (_name##_early_ptr)
+#define	early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
+#define	early_per_cpu(_name, _cpu) 				\
+	(early_per_cpu_ptr(_name) ?				\
+		early_per_cpu_ptr(_name)[_cpu] :		\
+		per_cpu(_name, _cpu))
+
+#else	/* !CONFIG_SMP */
+#define	DEFINE_EARLY_PER_CPU(_type, _name, _initvalue)		\
+	DEFINE_PER_CPU(_type, _name) = _initvalue
+
+#define EXPORT_EARLY_PER_CPU_SYMBOL(_name)			\
+	EXPORT_PER_CPU_SYMBOL(_name)
+
+#define DECLARE_EARLY_PER_CPU(_type, _name)			\
+	DECLARE_PER_CPU(_type, _name)
+
+#define	early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
+#define	early_per_cpu_ptr(_name) NULL
+/* no early_per_cpu_map() */
+
+#endif	/* !CONFIG_SMP */
+
 #endif /* _ASM_X86_PERCPU_H_ */
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 1ebaa5cd311..ec841639fb4 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -29,21 +29,12 @@ extern int smp_num_siblings;
 extern unsigned int num_processors;
 extern cpumask_t cpu_initialized;
 
-#ifdef CONFIG_SMP
-extern u16 x86_cpu_to_apicid_init[];
-extern u16 x86_bios_cpu_apicid_init[];
-extern void *x86_cpu_to_apicid_early_ptr;
-extern void *x86_bios_cpu_apicid_early_ptr;
-#else
-#define x86_cpu_to_apicid_early_ptr NULL
-#define x86_bios_cpu_apicid_early_ptr NULL
-#endif
-
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 DECLARE_PER_CPU(u16, cpu_llc_id);
-DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
-DECLARE_PER_CPU(u16, x86_bios_cpu_apicid);
+
+DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
+DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
 
 /* Static state in head.S used to set up a CPU */
 extern struct {
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index dcf3f8131d6..dac09cb66dc 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -35,87 +35,67 @@
 # endif
 #endif
 
+/* Node not present */
+#define NUMA_NO_NODE	(-1)
+
 #ifdef CONFIG_NUMA
 #include <linux/cpumask.h>
 #include <asm/mpspec.h>
 
-/* Mappings between logical cpu number and node number */
 #ifdef CONFIG_X86_32
-extern int cpu_to_node_map[];
-#else
-/* Returns the number of the current Node. */
-#define numa_node_id()		(early_cpu_to_node(raw_smp_processor_id()))
-#endif
-
-DECLARE_PER_CPU(int, x86_cpu_to_node_map);
-
-#ifdef CONFIG_SMP
-extern int x86_cpu_to_node_map_init[];
-extern void *x86_cpu_to_node_map_early_ptr;
-#else
-#define x86_cpu_to_node_map_early_ptr NULL
-#endif
 
+/* Mappings between node number and cpus on that node. */
 extern cpumask_t node_to_cpumask_map[];
 
-#define NUMA_NO_NODE	(-1)
+/* Mappings between logical cpu number and node number */
+extern int cpu_to_node_map[];
 
 /* Returns the number of the node containing CPU 'cpu' */
-#ifdef CONFIG_X86_32
-#define early_cpu_to_node(cpu)	cpu_to_node(cpu)
 static inline int cpu_to_node(int cpu)
 {
 	return cpu_to_node_map[cpu];
 }
+#define early_cpu_to_node(cpu)	cpu_to_node(cpu)
 
 #else /* CONFIG_X86_64 */
 
-#ifdef CONFIG_SMP
-static inline int early_cpu_to_node(int cpu)
-{
-	int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
-
-	if (cpu_to_node_map)
-		return cpu_to_node_map[cpu];
-	else if (per_cpu_offset(cpu))
-		return per_cpu(x86_cpu_to_node_map, cpu);
-	else
-		return NUMA_NO_NODE;
-}
-#else
-#define	early_cpu_to_node(cpu)	cpu_to_node(cpu)
-#endif
+/* Mappings between node number and cpus on that node. */
+extern cpumask_t node_to_cpumask_map[];
+
+/* Mappings between logical cpu number and node number */
+DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
+
+/* Returns the number of the current Node. */
+#define numa_node_id()	(per_cpu(x86_cpu_to_node_map, raw_smp_processor_id()))
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+extern int cpu_to_node(int cpu);
+extern int early_cpu_to_node(int cpu);
+extern cpumask_t *_node_to_cpumask_ptr(int node);
+extern cpumask_t node_to_cpumask(int node);
 
+#else	/* !CONFIG_DEBUG_PER_CPU_MAPS */
+
+/* Returns the number of the node containing CPU 'cpu' */
 static inline int cpu_to_node(int cpu)
 {
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-	if (x86_cpu_to_node_map_early_ptr) {
-		printk("KERN_NOTICE cpu_to_node(%d): usage too early!\n",
-		       (int)cpu);
-		dump_stack();
-		return ((int *)x86_cpu_to_node_map_early_ptr)[cpu];
-	}
-#endif
 	return per_cpu(x86_cpu_to_node_map, cpu);
 }
 
-#ifdef	CONFIG_NUMA
-
-/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
-#define node_to_cpumask_ptr(v, node)		\
-		cpumask_t *v = &(node_to_cpumask_map[node])
-
-#define node_to_cpumask_ptr_next(v, node)	\
-			   v = &(node_to_cpumask_map[node])
-#endif
+/* Same function but used if called before per_cpu areas are setup */
+static inline int early_cpu_to_node(int cpu)
+{
+	if (early_per_cpu_ptr(x86_cpu_to_node_map))
+		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
 
-#endif /* CONFIG_X86_64 */
+	return per_cpu(x86_cpu_to_node_map, cpu);
+}
 
-/*
- * Returns the number of the node containing Node 'node'. This
- * architecture is flat, so it is a pretty simple function!
- */
-#define parent_node(node) (node)
+/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
+static inline cpumask_t *_node_to_cpumask_ptr(int node)
+{
+	return &node_to_cpumask_map[node];
+}
 
 /* Returns a bitmask of CPUs on Node 'node'. */
 static inline cpumask_t node_to_cpumask(int node)
@@ -123,14 +103,29 @@ static inline cpumask_t node_to_cpumask(int node)
 	return node_to_cpumask_map[node];
 }
 
+#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
+#endif /* CONFIG_X86_64 */
+
+/* Replace default node_to_cpumask_ptr with optimized version */
+#define node_to_cpumask_ptr(v, node)		\
+		cpumask_t *v = _node_to_cpumask_ptr(node)
+
+#define node_to_cpumask_ptr_next(v, node)	\
+			   v = _node_to_cpumask_ptr(node)
+
 /* Returns the number of the first CPU on Node 'node'. */
 static inline int node_to_first_cpu(int node)
 {
-	cpumask_t mask = node_to_cpumask(node);
-
-	return first_cpu(mask);
+	node_to_cpumask_ptr(mask, node);
+	return first_cpu(*mask);
 }
 
+/*
+ * Returns the number of the node containing Node 'node'. This
+ * architecture is flat, so it is a pretty simple function!
+ */
+#define parent_node(node) (node)
+
 #define pcibus_to_node(bus) __pcibus_to_node(bus)
 #define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus)
 
@@ -180,8 +175,31 @@ extern int __node_distance(int, int);
 #define node_distance(a, b) __node_distance(a, b)
 #endif
 
-#else /* CONFIG_NUMA */
+#else /* !CONFIG_NUMA */
+
+#define numa_node_id()		0
+#define	cpu_to_node(cpu)	0
+#define	early_cpu_to_node(cpu)	0
+
+static inline cpumask_t *_node_to_cpumask_ptr(int node)
+{
+	return &cpu_online_map;
+}
+static inline cpumask_t node_to_cpumask(int node)
+{
+	return cpu_online_map;
+}
+static inline int node_to_first_cpu(int node)
+{
+	return first_cpu(cpu_online_map);
+}
+
+/* Replace default node_to_cpumask_ptr with optimized version */
+#define node_to_cpumask_ptr(v, node)		\
+		cpumask_t *v = _node_to_cpumask_ptr(node)
 
+#define node_to_cpumask_ptr_next(v, node)	\
+			   v = _node_to_cpumask_ptr(node)
 #endif
 
 #include <asm-generic/topology.h>
@@ -193,6 +211,9 @@ extern cpumask_t cpu_coregroup_map(int cpu);
 #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 #define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
+
+/* indicates that pointers to the topology cpumask_t maps are valid */
+#define arch_provides_topology_pointers		yes
 #endif
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
@@ -220,4 +241,4 @@ static inline void set_mp_bus_to_node(int busnum, int node)
 }
 #endif
 
-#endif
+#endif /* _ASM_X86_TOPOLOGY_H */
-- 
cgit v1.2.3-70-g09d2


From 7891a24e1ee50c96896c0cf7da216a8e7b573ca5 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Mon, 12 May 2008 21:21:12 +0200
Subject: x86: restore pda nodenumber field

  * Restore the nodenumber field in the x86_64 pda.  This field is slightly
    different than the x86_cpu_to_node_map mainly because it's a static
    indication of which node the cpu is on while the cpu to node map is a
    dyanamic mapping that may get reset if the cpu goes offline.  This also
    simplifies the numa_node_id() macro.

For inclusion into sched-devel/latest tree.

Based on:
	git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
    +   sched-devel/latest  .../mingo/linux-2.6-sched-devel.git

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup.c    | 4 ++++
 include/asm-x86/pda.h      | 1 +
 include/asm-x86/topology.h | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 03caa8e4351..0dff17ee3d7 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -32,6 +32,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 #define	X86_64_NUMA	1
 
+/* map cpu index to node index */
 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
 #endif
@@ -155,6 +156,9 @@ void __cpuinit numa_set_node(int cpu, int node)
 {
 	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
 
+	if (node != NUMA_NO_NODE)
+		cpu_pda(cpu)->nodenumber = node;
+
 	if (cpu_to_node_map)
 		cpu_to_node_map[cpu] = node;
 
diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h
index 101fb9e1195..de2ad9ac35a 100644
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -22,6 +22,7 @@ struct x8664_pda {
 					   offset 40!!! */
 #endif
 	char *irqstackptr;
+	int nodenumber;			/* number of current node */
 	unsigned int __softirq_pending;
 	unsigned int __nmi_count;	/* number of NMI on this CPUs */
 	short mmu_state;
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index dac09cb66dc..c0e6ff7671e 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -66,7 +66,7 @@ extern cpumask_t node_to_cpumask_map[];
 DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
 
 /* Returns the number of the current Node. */
-#define numa_node_id()	(per_cpu(x86_cpu_to_node_map, raw_smp_processor_id()))
+#define numa_node_id()		read_pda(nodenumber)
 
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 extern int cpu_to_node(int cpu);
-- 
cgit v1.2.3-70-g09d2


From 9f248bde9d47cc177011198c9a15fb339b9f3215 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Mon, 12 May 2008 21:21:12 +0200
Subject: x86: remove the static 256k node_to_cpumask_map

  * Consolidate node_to_cpumask operations and remove the 256k
    byte node_to_cpumask_map.  This is done by allocating the
    node_to_cpumask_map array after the number of possible nodes
    (nr_node_ids) is known.

  * Debug printouts when CONFIG_DEBUG_PER_CPU_MAPS is active have
    been increased.  It now shows faults when calling node_to_cpumask()
    and node_to_cpumask_ptr().

For inclusion into sched-devel/latest tree.

Based on:
	git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
    +   sched-devel/latest  .../mingo/linux-2.6-sched-devel.git

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/setup.c    | 132 +++++++++++++++++++++++++++++++++++++++++++--
 arch/x86/mm/numa_64.c      |   6 ---
 include/asm-x86/topology.h |  25 ++++++---
 3 files changed, 144 insertions(+), 19 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0dff17ee3d7..913af838c3c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -35,6 +35,16 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
 /* map cpu index to node index */
 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+
+/* which logical CPUs are on which nodes */
+cpumask_t *node_to_cpumask_map;
+EXPORT_SYMBOL(node_to_cpumask_map);
+
+/* setup node_to_cpumask_map */
+static void __init setup_node_to_cpumask_map(void);
+
+#else
+static inline void setup_node_to_cpumask_map(void) { }
 #endif
 
 #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
@@ -140,11 +150,15 @@ void __init setup_per_cpu_areas(void)
 	}
 
 	nr_cpu_ids = highest_cpu + 1;
-	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids);
+	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
+		NR_CPUS, nr_cpu_ids, nr_node_ids);
 
 	/* Setup percpu data maps */
 	setup_per_cpu_maps();
 
+	/* Setup node to cpumask map */
+	setup_node_to_cpumask_map();
+
 	/* Setup cpumask_of_cpu map */
 	setup_cpumask_of_cpu();
 }
@@ -152,6 +166,35 @@ void __init setup_per_cpu_areas(void)
 #endif
 
 #ifdef X86_64_NUMA
+
+/*
+ * Allocate node_to_cpumask_map based on number of available nodes
+ * Requires node_possible_map to be valid.
+ *
+ * Note: node_to_cpumask() is not valid until after this is done.
+ */
+static void __init setup_node_to_cpumask_map(void)
+{
+	unsigned int node, num = 0;
+	cpumask_t *map;
+
+	/* setup nr_node_ids if not done yet */
+	if (nr_node_ids == MAX_NUMNODES) {
+		for_each_node_mask(node, node_possible_map)
+			num = node;
+		nr_node_ids = num + 1;
+	}
+
+	/* allocate the map */
+	map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
+
+	Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n",
+		map, nr_node_ids);
+
+	/* node_to_cpumask() will now work */
+	node_to_cpumask_map = map;
+}
+
 void __cpuinit numa_set_node(int cpu, int node)
 {
 	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
@@ -174,6 +217,8 @@ void __cpuinit numa_clear_node(int cpu)
 	numa_set_node(cpu, NUMA_NO_NODE);
 }
 
+#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+
 void __cpuinit numa_add_cpu(int cpu)
 {
 	cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
@@ -183,9 +228,44 @@ void __cpuinit numa_remove_cpu(int cpu)
 {
 	cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
 }
-#endif /* CONFIG_NUMA */
 
-#if defined(CONFIG_DEBUG_PER_CPU_MAPS) && defined(CONFIG_X86_64)
+#else /* CONFIG_DEBUG_PER_CPU_MAPS */
+
+/*
+ * --------- debug versions of the numa functions ---------
+ */
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+	int node = cpu_to_node(cpu);
+	cpumask_t *mask;
+	char buf[64];
+
+	if (node_to_cpumask_map == NULL) {
+		printk(KERN_ERR "node_to_cpumask_map NULL\n");
+		dump_stack();
+		return;
+	}
+
+	mask = &node_to_cpumask_map[node];
+	if (enable)
+		cpu_set(cpu, *mask);
+	else
+		cpu_clear(cpu, *mask);
+
+	cpulist_scnprintf(buf, sizeof(buf), *mask);
+	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+		enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
+ }
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+	numa_set_cpumask(cpu, 1);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+	numa_set_cpumask(cpu, 0);
+}
 
 int cpu_to_node(int cpu)
 {
@@ -199,6 +279,10 @@ int cpu_to_node(int cpu)
 }
 EXPORT_SYMBOL(cpu_to_node);
 
+/*
+ * Same function as cpu_to_node() but used if called before the
+ * per_cpu areas are setup.
+ */
 int early_cpu_to_node(int cpu)
 {
 	if (early_per_cpu_ptr(x86_cpu_to_node_map))
@@ -207,9 +291,47 @@ int early_cpu_to_node(int cpu)
 	if (!per_cpu_offset(cpu)) {
 		printk(KERN_WARNING
 			"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
-			dump_stack();
+		dump_stack();
 		return NUMA_NO_NODE;
 	}
 	return per_cpu(x86_cpu_to_node_map, cpu);
 }
-#endif
+
+/*
+ * Returns a pointer to the bitmask of CPUs on Node 'node'.
+ */
+cpumask_t *_node_to_cpumask_ptr(int node)
+{
+	if (node_to_cpumask_map == NULL) {
+		printk(KERN_WARNING
+			"_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
+			node);
+		dump_stack();
+		return &cpu_online_map;
+	}
+	return &node_to_cpumask_map[node];
+}
+EXPORT_SYMBOL(_node_to_cpumask_ptr);
+
+/*
+ * Returns a bitmask of CPUs on Node 'node'.
+ */
+cpumask_t node_to_cpumask(int node)
+{
+	if (node_to_cpumask_map == NULL) {
+		printk(KERN_WARNING
+			"node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
+		dump_stack();
+		return cpu_online_map;
+	}
+	return node_to_cpumask_map[node];
+}
+EXPORT_SYMBOL(node_to_cpumask);
+
+/*
+ * --------- end of debug versions of the numa functions ---------
+ */
+
+#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+
+#endif /* X86_64_NUMA */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 970f86775c4..14c7ab417ec 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -35,9 +35,6 @@ s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
 
-cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL(node_to_cpumask_map);
-
 int numa_off __initdata;
 unsigned long __initdata nodemap_addr;
 unsigned long __initdata nodemap_size;
@@ -560,9 +557,6 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 	node_set(0, node_possible_map);
 	for (i = 0; i < NR_CPUS; i++)
 		numa_set_node(i, 0);
-	/* cpumask_of_cpu() may not be available during early startup */
-	memset(&node_to_cpumask_map[0], 0, sizeof(node_to_cpumask_map[0]));
-	cpu_set(0, node_to_cpumask_map[0]);
 	e820_register_active_regions(0, start_pfn, end_pfn);
 	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
 }
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index c0e6ff7671e..1f97758de4a 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -57,10 +57,16 @@ static inline int cpu_to_node(int cpu)
 }
 #define early_cpu_to_node(cpu)	cpu_to_node(cpu)
 
+/* Returns a bitmask of CPUs on Node 'node'. */
+static inline cpumask_t node_to_cpumask(int node)
+{
+	return node_to_cpumask_map[node];
+}
+
 #else /* CONFIG_X86_64 */
 
 /* Mappings between node number and cpus on that node. */
-extern cpumask_t node_to_cpumask_map[];
+extern cpumask_t *node_to_cpumask_map;
 
 /* Mappings between logical cpu number and node number */
 DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
@@ -104,7 +110,6 @@ static inline cpumask_t node_to_cpumask(int node)
 }
 
 #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
-#endif /* CONFIG_X86_64 */
 
 /* Replace default node_to_cpumask_ptr with optimized version */
 #define node_to_cpumask_ptr(v, node)		\
@@ -113,12 +118,7 @@ static inline cpumask_t node_to_cpumask(int node)
 #define node_to_cpumask_ptr_next(v, node)	\
 			   v = _node_to_cpumask_ptr(node)
 
-/* Returns the number of the first CPU on Node 'node'. */
-static inline int node_to_first_cpu(int node)
-{
-	node_to_cpumask_ptr(mask, node);
-	return first_cpu(*mask);
-}
+#endif /* CONFIG_X86_64 */
 
 /*
  * Returns the number of the node containing Node 'node'. This
@@ -204,6 +204,15 @@ static inline int node_to_first_cpu(int node)
 
 #include <asm-generic/topology.h>
 
+#ifdef CONFIG_NUMA
+/* Returns the number of the first CPU on Node 'node'. */
+static inline int node_to_first_cpu(int node)
+{
+	node_to_cpumask_ptr(mask, node);
+	return first_cpu(*mask);
+}
+#endif
+
 extern cpumask_t cpu_coregroup_map(int cpu);
 
 #ifdef ENABLE_TOPO_DEFINES
-- 
cgit v1.2.3-70-g09d2


From 3461b0af025251bbc6b3d56c821c6ac2de6f7209 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Mon, 12 May 2008 21:21:13 +0200
Subject: x86: remove static boot_cpu_pda array v2

  * Remove the boot_cpu_pda array and pointer table from the data section.
    Allocate the pointer table and array during init.  do_boot_cpu()
    will reallocate the pda in node local memory and if the cpu is being
    brought up before the bootmem array is released (after_bootmem = 0),
    then it will free the initial pda.  This will happen for all cpus
    present at system startup.

    This removes 512k + 32k bytes from the data section.

For inclusion into sched-devel/latest tree.

Based on:
	git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
    +   sched-devel/latest  .../mingo/linux-2.6-sched-devel.git

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/head64.c  | 26 +++++++++++++++--
 arch/x86/kernel/setup.c   | 73 ++++++++++++++++++++++++++++++++++++-----------
 arch/x86/kernel/setup64.c |  8 ++++--
 arch/x86/kernel/smpboot.c | 59 +++++++++++++++++++++++++++++---------
 include/asm-x86/pda.h     |  6 ++--
 include/linux/mm.h        |  1 +
 6 files changed, 135 insertions(+), 38 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index e25c57b8aa8..0ab59edd706 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -25,6 +25,24 @@
 #include <asm/e820.h>
 #include <asm/bios_ebda.h>
 
+/* boot cpu pda */
+static struct x8664_pda _boot_cpu_pda __read_mostly;
+
+#ifdef CONFIG_SMP
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+/*
+ * We install an empty cpu_pda pointer table to trap references before
+ * the actual cpu_pda pointer table is created in setup_cpu_pda_map().
+ */
+static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
+#else
+static struct x8664_pda *__cpu_pda[1] __read_mostly;
+#endif
+
+#else /* !CONFIG_SMP (NR_CPUS will be 1) */
+static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
+#endif
+
 static void __init zap_identity_mappings(void)
 {
 	pgd_t *pgd = pgd_offset_k(0UL);
@@ -156,10 +174,12 @@ void __init x86_64_start_kernel(char * real_mode_data)
 
 	early_printk("Kernel alive\n");
 
- 	for (i = 0; i < NR_CPUS; i++)
- 		cpu_pda(i) = &boot_cpu_pda[i];
-
+	_cpu_pda = __cpu_pda;
+	cpu_pda(0) = &_boot_cpu_pda;
 	pda_init(0);
+
+	early_printk("Kernel really alive\n");
+
 	copy_bootdata(__va(real_mode_data));
 
 	reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 913af838c3c..dd12c1c84a8 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -101,6 +101,50 @@ static inline void setup_cpumask_of_cpu(void) { }
  */
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
+static inline void setup_cpu_pda_map(void) { }
+
+#elif !defined(CONFIG_SMP)
+static inline void setup_cpu_pda_map(void) { }
+
+#else /* CONFIG_SMP && CONFIG_X86_64 */
+
+/*
+ * Allocate cpu_pda pointer table and array via alloc_bootmem.
+ */
+static void __init setup_cpu_pda_map(void)
+{
+	char *pda;
+	struct x8664_pda **new_cpu_pda;
+	unsigned long size;
+	int cpu;
+
+	size = roundup(sizeof(struct x8664_pda), cache_line_size());
+
+	/* allocate cpu_pda array and pointer table */
+	{
+		unsigned long tsize = nr_cpu_ids * sizeof(void *);
+		unsigned long asize = size * (nr_cpu_ids - 1);
+
+		tsize = roundup(tsize, cache_line_size());
+		new_cpu_pda = alloc_bootmem(tsize + asize);
+		pda = (char *)new_cpu_pda + tsize;
+	}
+
+	/* initialize pointer table to static pda's */
+	for_each_possible_cpu(cpu) {
+		if (cpu == 0) {
+			/* leave boot cpu pda in place */
+			new_cpu_pda[0] = cpu_pda(0);
+			continue;
+		}
+		new_cpu_pda[cpu] = (struct x8664_pda *)pda;
+		new_cpu_pda[cpu]->in_bootmem = 1;
+		pda += size;
+	}
+
+	/* point to new pointer table */
+	_cpu_pda = new_cpu_pda;
+}
 #endif
 
 /*
@@ -110,46 +154,43 @@ EXPORT_SYMBOL(__per_cpu_offset);
  */
 void __init setup_per_cpu_areas(void)
 {
-	int i, highest_cpu = 0;
-	unsigned long size;
+	ssize_t size = PERCPU_ENOUGH_ROOM;
+	char *ptr;
+	int cpu;
 
 #ifdef CONFIG_HOTPLUG_CPU
 	prefill_possible_map();
+#else
+	nr_cpu_ids = num_processors;
 #endif
 
+	/* Setup cpu_pda map */
+	setup_cpu_pda_map();
+
 	/* Copy section for each CPU (we discard the original) */
 	size = PERCPU_ENOUGH_ROOM;
 	printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
 			  size);
 
-	for_each_possible_cpu(i) {
-		char *ptr;
+	for_each_possible_cpu(cpu) {
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 		ptr = alloc_bootmem_pages(size);
 #else
-		int node = early_cpu_to_node(i);
+		int node = early_cpu_to_node(cpu);
 		if (!node_online(node) || !NODE_DATA(node)) {
 			ptr = alloc_bootmem_pages(size);
 			printk(KERN_INFO
 			       "cpu %d has no node %d or node-local memory\n",
-				i, node);
+				cpu, node);
 		}
 		else
 			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
 #endif
-		if (!ptr)
-			panic("Cannot allocate cpu data for CPU %d\n", i);
-#ifdef CONFIG_X86_64
-		cpu_pda(i)->data_offset = ptr - __per_cpu_start;
-#else
-		__per_cpu_offset[i] = ptr - __per_cpu_start;
-#endif
+		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
 
-		highest_cpu = i;
 	}
 
-	nr_cpu_ids = highest_cpu + 1;
 	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
 		NR_CPUS, nr_cpu_ids, nr_node_ids);
 
@@ -199,7 +240,7 @@ void __cpuinit numa_set_node(int cpu, int node)
 {
 	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
 
-	if (node != NUMA_NO_NODE)
+	if (cpu_pda(cpu) && node != NUMA_NO_NODE)
 		cpu_pda(cpu)->nodenumber = node;
 
 	if (cpu_to_node_map)
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index aee0e820077..631ea6cc01d 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -12,6 +12,7 @@
 #include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/kgdb.h>
+#include <linux/topology.h>
 #include <asm/pda.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -34,9 +35,8 @@ struct boot_params boot_params;
 
 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
 
-struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
+struct x8664_pda **_cpu_pda __read_mostly;
 EXPORT_SYMBOL(_cpu_pda);
-struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
 
 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 
@@ -114,8 +114,10 @@ void pda_init(int cpu)
 			__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
 		if (!pda->irqstackptr)
 			panic("cannot allocate irqstack for cpu %d", cpu); 
-	}
 
+		if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
+			pda->nodenumber = cpu_to_node(cpu);
+	}
 
 	pda->irqstackptr += IRQSTACKSIZE-64;
 } 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 036604d3dae..bf083348745 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -816,6 +816,43 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
 	complete(&c_idle->done);
 }
 
+/*
+ * Allocate node local memory for the AP pda.
+ *
+ * Must be called after the _cpu_pda pointer table is initialized.
+ */
+static int __cpuinit get_local_pda(int cpu)
+{
+	struct x8664_pda *oldpda, *newpda;
+	unsigned long size = sizeof(struct x8664_pda);
+	int node = cpu_to_node(cpu);
+
+	if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
+		return 0;
+
+	oldpda = cpu_pda(cpu);
+	newpda = kmalloc_node(size, GFP_ATOMIC, node);
+	if (!newpda) {
+		printk(KERN_ERR "Could not allocate node local PDA "
+			"for CPU %d on node %d\n", cpu, node);
+
+		if (oldpda)
+			return 0;	/* have a usable pda */
+		else
+			return -1;
+	}
+
+	if (oldpda) {
+		memcpy(newpda, oldpda, size);
+		if (!after_bootmem)
+			free_bootmem((unsigned long)oldpda, size);
+	}
+
+	newpda->in_bootmem = 0;
+	cpu_pda(cpu) = newpda;
+	return 0;
+}
+
 static int __cpuinit do_boot_cpu(int apicid, int cpu)
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -841,19 +878,11 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 	}
 
 	/* Allocate node local memory for AP pdas */
-	if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
-		struct x8664_pda *newpda, *pda;
-		int node = cpu_to_node(cpu);
-		pda = cpu_pda(cpu);
-		newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC,
-				      node);
-		if (newpda) {
-			memcpy(newpda, pda, sizeof(struct x8664_pda));
-			cpu_pda(cpu) = newpda;
-		} else
-			printk(KERN_ERR
-		"Could not allocate node local PDA for CPU %d on node %d\n",
-				cpu, node);
+	if (cpu > 0) {
+		boot_error = get_local_pda(cpu);
+		if (boot_error)
+			goto restore_state;
+			/* if can't get pda memory, can't start cpu */
 	}
 #endif
 
@@ -972,6 +1001,8 @@ do_rest:
 		}
 	}
 
+restore_state:
+
 	if (boot_error) {
 		/* Try to put things back the way they were before ... */
 		unmap_cpu_to_logical_apicid(cpu);
@@ -1347,6 +1378,8 @@ __init void prefill_possible_map(void)
 
 	for (i = 0; i < possible; i++)
 		cpu_set(i, cpu_possible_map);
+
+	nr_cpu_ids = possible;
 }
 
 static void __ref remove_cpu_from_maps(int cpu)
diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h
index de2ad9ac35a..b34e9a7cc80 100644
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -22,7 +22,8 @@ struct x8664_pda {
 					   offset 40!!! */
 #endif
 	char *irqstackptr;
-	int nodenumber;			/* number of current node */
+	short nodenumber;		/* number of current node (32k max) */
+	short in_bootmem;		/* pda lives in bootmem */
 	unsigned int __softirq_pending;
 	unsigned int __nmi_count;	/* number of NMI on this CPUs */
 	short mmu_state;
@@ -38,8 +39,7 @@ struct x8664_pda {
 	unsigned irq_spurious_count;
 } ____cacheline_aligned_in_smp;
 
-extern struct x8664_pda *_cpu_pda[];
-extern struct x8664_pda boot_cpu_pda[];
+extern struct x8664_pda **_cpu_pda;
 extern void pda_init(int);
 
 #define cpu_pda(i) (_cpu_pda[i])
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 586a943cab0..0ea48a5af82 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1024,6 +1024,7 @@ extern void mem_init(void);
 extern void show_mem(void);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
+extern int after_bootmem;
 
 #ifdef CONFIG_NUMA
 extern void setup_per_cpu_pageset(void);
-- 
cgit v1.2.3-70-g09d2


From 3fd052b1b46ac23a2316283a996fe6c32dbcf132 Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Sun, 8 Jun 2008 15:46:30 +0200
Subject: x86: add flags parameter to reserve_bootmem_generic()

This patch adds a 'flags' parameter to reserve_bootmem_generic() like it
already has been added in reserve_bootmem() with commit
72a7fe3967dbf86cb34e24fbf1d957fe24d2f246.

It also changes all users to use BOOTMEM_DEFAULT, which doesn't effectively
change the behaviour. Since the change is x86-specific, I don't think it's
necessary to add a new API for migration. There are only 4 users of that
function.

The change is necessary for the next patch, using reserve_bootmem_generic()
for crashkernel reservation.

Signed-off-by: Bernhard Walle <bwalle@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820_64.c |  3 ++-
 arch/x86/kernel/efi_64.c  |  3 ++-
 arch/x86/kernel/mpparse.c |  5 +++--
 arch/x86/mm/init_64.c     | 17 ++++++++++++-----
 include/asm-x86/proto.h   |  2 +-
 5 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 124480c0008..af1eb078974 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -118,7 +118,8 @@ void __init early_res_to_bootmem(unsigned long start, unsigned long end)
 			continue;
 		printk(KERN_INFO "  early res: %d [%lx-%lx] %s\n", i,
 			final_start, final_end - 1, r->name);
-		reserve_bootmem_generic(final_start, final_end - final_start);
+		reserve_bootmem_generic(final_start, final_end - final_start,
+				BOOTMEM_DEFAULT);
 	}
 }
 
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index d0060fdccca..d561dd5f1e6 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -100,7 +100,8 @@ void __init efi_call_phys_epilog(void)
 void __init efi_reserve_bootmem(void)
 {
 	reserve_bootmem_generic((unsigned long)memmap.phys_map,
-				memmap.nr_map * memmap.desc_size);
+				memmap.nr_map * memmap.desc_size,
+				BOOTMEM_DEFAULT);
 }
 
 void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 404683b94e7..4901ae3f742 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -729,10 +729,11 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
 			if (!reserve)
 				return 1;
 
-			reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
+			reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
+				BOOTMEM_DEFAULT);
 			if (mpf->mpf_physptr)
 				reserve_bootmem_generic(mpf->mpf_physptr,
-							PAGE_SIZE);
+					PAGE_SIZE, BOOTMEM_DEFAULT);
 #endif
 		return 1;
 		}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 819dad973b1..bf7bf1de6c2 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -799,12 +799,13 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
-void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
+int __init reserve_bootmem_generic(unsigned long phys, unsigned len, int flags)
 {
 #ifdef CONFIG_NUMA
 	int nid, next_nid;
 #endif
 	unsigned long pfn = phys >> PAGE_SHIFT;
+	int ret;
 
 	if (pfn >= end_pfn) {
 		/*
@@ -812,11 +813,11 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 		 * firmware tables:
 		 */
 		if (pfn < max_pfn_mapped)
-			return;
+			return -EFAULT;
 
 		printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
 				phys, len);
-		return;
+		return -EFAULT;
 	}
 
 	/* Should check here against the e820 map to avoid double free */
@@ -824,9 +825,13 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 	nid = phys_to_nid(phys);
 	next_nid = phys_to_nid(phys + len - 1);
 	if (nid == next_nid)
-		reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
+		ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
 	else
-		reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
+		ret = reserve_bootmem(phys, len, flags);
+
+	if (ret != 0)
+		return ret;
+
 #else
 	reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
 #endif
@@ -835,6 +840,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 		dma_reserve += len / PAGE_SIZE;
 		set_dma_reserve(dma_reserve);
 	}
+
+	return 0;
 }
 
 int kern_addr_valid(unsigned long addr)
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 6c8b41b03f6..a9f51472521 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -14,7 +14,7 @@ extern void ia32_syscall(void);
 extern void ia32_cstar_target(void);
 extern void ia32_sysenter_target(void);
 
-extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
+extern int reserve_bootmem_generic(unsigned long phys, unsigned len, int flags);
 
 extern void syscall32_cpu_init(void);
 
-- 
cgit v1.2.3-70-g09d2


From 1812924bb1823950c1dc95c478b71b037057356e Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Mon, 2 Jun 2008 08:56:14 -0500
Subject: x86, SGI UV: TLB shootdown using broadcast assist unit

TLB shootdown for SGI UV.

Depends on patch (in tip/x86/irq):
   x86-update-macros-used-by-uv-platform.patch   Jack Steiner May 29

This patch provides the ability to flush TLB's in cpu's that are not on
the local node.  The hardware mechanism for distributing the flush
messages is the UV's "broadcast assist unit".

The hook to intercept TLB shootdown requests is a 2-line change to
native_flush_tlb_others() (arch/x86/kernel/tlb_64.c).

This code has been tested on a hardware simulator. The real hardware
is not yet available.

The shootdown statistics are provided through /proc/sgi_uv/ptc_statistics.
The use of /sys was considered, but would have required the use of
many /sys files.  The debugfs was also considered, but these statistics
should be available on an ongoing basis, not just for debugging.

Issues to be fixed later:
- The IRQ for the messaging interrupt is currently hardcoded as 200
  (see UV_BAU_MESSAGE).  It should be dynamically assigned in the future.
- The use of appropriate udelay()'s is untested, as they are a problem
  in the simulator.

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile    |   2 +-
 arch/x86/kernel/entry_64.S  |   4 +
 arch/x86/kernel/tlb_64.c    |   5 +
 arch/x86/kernel/tlb_uv.c    | 736 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/uv/uv_bau.h | 331 ++++++++++++++++++++
 5 files changed, 1077 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/tlb_uv.c
 create mode 100644 include/asm-x86/uv/uv_bau.h

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 605995e11ea..4078e98f012 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -94,7 +94,7 @@ obj-$(CONFIG_OLPC)		+= olpc.o
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
-        obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
+        obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
         obj-$(CONFIG_AUDIT)		+= audit_64.o
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 556a8df522a..6fd1987466e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -720,6 +720,10 @@ ENTRY(apic_timer_interrupt)
 	apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
 END(apic_timer_interrupt)
 
+ENTRY(uv_bau_message_intr1)
+	apicinterrupt 220,uv_bau_message_interrupt
+END(uv_bau_message_intr1)
+
 ENTRY(error_interrupt)
 	apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
 END(error_interrupt)
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index a1f07d79320..fc132113bda 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -15,6 +15,8 @@
 #include <asm/proto.h>
 #include <asm/apicdef.h>
 #include <asm/idle.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_bau.h>
 
 #include <mach_ipi.h>
 /*
@@ -162,6 +164,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	union smp_flush_state *f;
 	cpumask_t cpumask = *cpumaskp;
 
+	if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
+			return;
+
 	/* Caller has disabled preemption */
 	sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
 	f = &per_cpu(flush_state, sender);
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
new file mode 100644
index 00000000000..28e7c68d9d7
--- /dev/null
+++ b/arch/x86/kernel/tlb_uv.c
@@ -0,0 +1,736 @@
+/*
+ *	SGI UltraViolet TLB flush routines.
+ *
+ *	(c) 2008 Cliff Wickman <cpw@sgi.com>, SGI.
+ *
+ *	This code is released under the GNU General Public License version 2 or
+ *	later.
+ */
+#include <linux/mc146818rtc.h>
+#include <linux/proc_fs.h>
+#include <linux/kernel.h>
+
+#include <asm/mach-bigsmp/mach_apic.h>
+#include <asm/mmu_context.h>
+#include <asm/idle.h>
+#include <asm/genapic.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_bau.h>
+
+struct bau_control **uv_bau_table_bases;
+static int uv_bau_retry_limit;
+static int uv_nshift;		/* position of pnode (which is nasid>>1) */
+static unsigned long uv_mmask;
+
+char *status_table[] = {
+	"IDLE",
+	"ACTIVE",
+	"DESTINATION TIMEOUT",
+	"SOURCE TIMEOUT"
+};
+
+DEFINE_PER_CPU(struct ptc_stats, ptcstats);
+DEFINE_PER_CPU(struct bau_control, bau_control);
+
+/*
+ * Free a software acknowledge hardware resource by clearing its Pending
+ * bit. This will return a reply to the sender.
+ * If the message has timed out, a reply has already been sent by the
+ * hardware but the resource has not been released. In that case our
+ * clear of the Timeout bit (as well) will free the resource. No reply will
+ * be sent (the hardware will only do one reply per message).
+ */
+static void
+uv_reply_to_message(int resource,
+		    struct bau_payload_queue_entry *msg,
+		    struct bau_msg_status *msp)
+{
+	int fw;
+
+	fw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource);
+	msg->replied_to = 1;
+	msg->sw_ack_vector = 0;
+	if (msp)
+		msp->seen_by.bits = 0;
+	uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, fw);
+	return;
+}
+
+/*
+ * Do all the things a cpu should do for a TLB shootdown message.
+ * Other cpu's may come here at the same time for this message.
+ */
+static void
+uv_bau_process_message(struct bau_payload_queue_entry *msg,
+		       int msg_slot, int sw_ack_slot)
+{
+	int cpu;
+	unsigned long this_cpu_mask;
+	struct bau_msg_status *msp;
+
+	msp = __get_cpu_var(bau_control).msg_statuses + msg_slot;
+	cpu = uv_blade_processor_id();
+	msg->number_of_cpus =
+	    uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
+	this_cpu_mask = (unsigned long)1 << cpu;
+	if (msp->seen_by.bits & this_cpu_mask)
+		return;
+	atomic_or_long(&msp->seen_by.bits, this_cpu_mask);
+
+	if (msg->replied_to == 1)
+		return;
+
+	if (msg->address == TLB_FLUSH_ALL) {
+		local_flush_tlb();
+		__get_cpu_var(ptcstats).alltlb++;
+	} else {
+		__flush_tlb_one(msg->address);
+		__get_cpu_var(ptcstats).onetlb++;
+	}
+
+	__get_cpu_var(ptcstats).requestee++;
+
+	atomic_inc_short(&msg->acknowledge_count);
+	if (msg->number_of_cpus == msg->acknowledge_count)
+		uv_reply_to_message(sw_ack_slot, msg, msp);
+	return;
+}
+
+/*
+ * Examine the payload queue on all the distribution nodes to see
+ * which messages have not been seen, and which cpu(s) have not seen them.
+ *
+ * Returns the number of cpu's that have not responded.
+ */
+static int
+uv_examine_destinations(struct bau_target_nodemask *distribution)
+{
+	int sender;
+	int i;
+	int j;
+	int k;
+	int count = 0;
+	struct bau_control *bau_tablesp;
+	struct bau_payload_queue_entry *msg;
+	struct bau_msg_status *msp;
+
+	sender = smp_processor_id();
+	for (i = 0; i < (sizeof(struct bau_target_nodemask) * BITSPERBYTE);
+	     i++) {
+		if (bau_node_isset(i, distribution)) {
+			bau_tablesp = uv_bau_table_bases[i];
+			for (msg = bau_tablesp->va_queue_first, j = 0;
+			     j < DESTINATION_PAYLOAD_QUEUE_SIZE; msg++, j++) {
+				if ((msg->sending_cpu == sender) &&
+				    (!msg->replied_to)) {
+					msp = bau_tablesp->msg_statuses + j;
+					printk(KERN_DEBUG
+				"blade %d: address:%#lx %d of %d, not cpu(s): ",
+					       i, msg->address,
+					       msg->acknowledge_count,
+					       msg->number_of_cpus);
+					for (k = 0; k < msg->number_of_cpus;
+					     k++) {
+						if (!((long)1 << k & msp->
+						      seen_by.bits)) {
+							count++;
+							printk("%d ", k);
+						}
+					}
+					printk("\n");
+				}
+			}
+		}
+	}
+	return count;
+}
+
+/**
+ * uv_flush_tlb_others - globally purge translation cache of a virtual
+ * address or all TLB's
+ * @cpumaskp: mask of all cpu's in which the address is to be removed
+ * @mm: mm_struct containing virtual address range
+ * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
+ *
+ * This is the entry point for initiating any UV global TLB shootdown.
+ *
+ * Purges the translation caches of all specified processors of the given
+ * virtual address, or purges all TLB's on specified processors.
+ *
+ * The caller has derived the cpumaskp from the mm_struct and has subtracted
+ * the local cpu from the mask.  This function is called only if there
+ * are bits set in the mask. (e.g. flush_tlb_page())
+ *
+ * The cpumaskp is converted into a nodemask of the nodes containing
+ * the cpus.
+ */
+int
+uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, unsigned long va)
+{
+	int i;
+	int blade;
+	int cpu;
+	int bit;
+	int right_shift;
+	int this_blade;
+	int exams = 0;
+	int tries = 0;
+	long source_timeouts = 0;
+	long destination_timeouts = 0;
+	unsigned long index;
+	unsigned long mmr_offset;
+	unsigned long descriptor_status;
+	struct bau_activation_descriptor *bau_desc;
+	ktime_t time1, time2;
+
+	cpu = uv_blade_processor_id();
+	this_blade = uv_numa_blade_id();
+	bau_desc = __get_cpu_var(bau_control).descriptor_base;
+	bau_desc += (UV_ITEMS_PER_DESCRIPTOR * cpu);
+
+	bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
+
+	i = 0;
+	for_each_cpu_mask(bit, *cpumaskp) {
+		blade = uv_cpu_to_blade_id(bit);
+		if (blade > (UV_DISTRIBUTION_SIZE - 1))
+			BUG();
+		if (blade == this_blade)
+			continue;
+		bau_node_set(blade, &bau_desc->distribution);
+		/* leave the bits for the remote cpu's in the mask until
+		   success; on failure we fall back to the IPI method */
+		i++;
+	}
+	if (i == 0)
+		goto none_to_flush;
+	__get_cpu_var(ptcstats).requestor++;
+	__get_cpu_var(ptcstats).ntargeted += i;
+
+	bau_desc->payload.address = va;
+	bau_desc->payload.sending_cpu = smp_processor_id();
+
+	if (cpu < UV_CPUS_PER_ACT_STATUS) {
+		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
+		right_shift = cpu * UV_ACT_STATUS_SIZE;
+	} else {
+		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
+		right_shift =
+		    ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
+	}
+	time1 = ktime_get();
+
+retry:
+	tries++;
+	index = ((unsigned long)
+		 1 << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | cpu;
+	uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
+
+	while ((descriptor_status = (((unsigned long)
+				      uv_read_local_mmr(mmr_offset) >>
+				      right_shift) & UV_ACT_STATUS_MASK)) !=
+	       DESC_STATUS_IDLE) {
+		if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) {
+			source_timeouts++;
+			if (source_timeouts > SOURCE_TIMEOUT_LIMIT)
+				source_timeouts = 0;
+			__get_cpu_var(ptcstats).s_retry++;
+			goto retry;
+		}
+		/* spin here looking for progress at the destinations */
+		if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) {
+			destination_timeouts++;
+			if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) {
+				/* returns # of cpus not responding */
+				if (uv_examine_destinations
+				    (&bau_desc->distribution) == 0) {
+					__get_cpu_var(ptcstats).d_retry++;
+					goto retry;
+				}
+				exams++;
+				if (exams >= uv_bau_retry_limit) {
+					printk(KERN_DEBUG
+					       "uv_flush_tlb_others");
+					printk("giving up on cpu %d\n",
+					       smp_processor_id());
+					goto unsuccessful;
+				}
+				/* delays can hang up the simulator
+				   udelay(1000);
+				 */
+				destination_timeouts = 0;
+			}
+		}
+	}
+	if (tries > 1)
+		__get_cpu_var(ptcstats).retriesok++;
+	/* on success, clear the remote cpu's from the mask so we don't
+	   use the IPI method of shootdown on them */
+	for_each_cpu_mask(bit, *cpumaskp) {
+		blade = uv_cpu_to_blade_id(bit);
+		if (blade == this_blade)
+			continue;
+		cpu_clear(bit, *cpumaskp);
+	}
+
+unsuccessful:
+	time2 = ktime_get();
+	__get_cpu_var(ptcstats).sflush_ns += (time2.tv64 - time1.tv64);
+
+none_to_flush:
+	if (cpus_empty(*cpumaskp))
+		return 1;
+
+	/* Cause the caller to do an IPI-style TLB shootdown on
+	   the cpu's still in the mask */
+	__get_cpu_var(ptcstats).ptc_i++;
+	return 0;
+}
+
+/*
+ * The BAU message interrupt comes here. (registered by set_intr_gate)
+ * See entry_64.S
+ *
+ * We received a broadcast assist message.
+ *
+ * Interrupts may have been disabled; this interrupt could represent
+ * the receipt of several messages.
+ *
+ * All cores/threads on this node get this interrupt.
+ * The last one to see it does the s/w ack.
+ * (the resource will not be freed until noninterruptable cpus see this
+ *  interrupt; hardware will timeout the s/w ack and reply ERROR)
+ */
+void
+uv_bau_message_interrupt(struct pt_regs *regs)
+{
+	struct bau_payload_queue_entry *pqp;
+	struct bau_payload_queue_entry *msg;
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	ktime_t time1, time2;
+	int msg_slot;
+	int sw_ack_slot;
+	int fw;
+	int count = 0;
+	unsigned long local_pnode;
+
+	ack_APIC_irq();
+	exit_idle();
+	irq_enter();
+
+	time1 = ktime_get();
+
+	local_pnode = uv_blade_to_pnode(uv_numa_blade_id());
+
+	pqp = __get_cpu_var(bau_control).va_queue_first;
+	msg = __get_cpu_var(bau_control).bau_msg_head;
+	while (msg->sw_ack_vector) {
+		count++;
+		fw = msg->sw_ack_vector;
+		msg_slot = msg - pqp;
+		sw_ack_slot = ffs(fw) - 1;
+
+		uv_bau_process_message(msg, msg_slot, sw_ack_slot);
+
+		msg++;
+		if (msg > __get_cpu_var(bau_control).va_queue_last)
+			msg = __get_cpu_var(bau_control).va_queue_first;
+		__get_cpu_var(bau_control).bau_msg_head = msg;
+	}
+	if (!count)
+		__get_cpu_var(ptcstats).nomsg++;
+	else if (count > 1)
+		__get_cpu_var(ptcstats).multmsg++;
+
+	time2 = ktime_get();
+	__get_cpu_var(ptcstats).dflush_ns += (time2.tv64 - time1.tv64);
+
+	irq_exit();
+	set_irq_regs(old_regs);
+	return;
+}
+
+static void
+uv_enable_timeouts(void)
+{
+	int i;
+	int blade;
+	int last_blade;
+	int pnode;
+	int cur_cpu = 0;
+	unsigned long apicid;
+
+	/* better if we had each_online_blade */
+	last_blade = -1;
+	for_each_online_node(i) {
+		blade = uv_node_to_blade_id(i);
+		if (blade == last_blade)
+			continue;
+		last_blade = blade;
+		apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
+		pnode = uv_blade_to_pnode(blade);
+		cur_cpu += uv_blade_nr_possible_cpus(i);
+	}
+	return;
+}
+
+static void *
+uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
+{
+	if (*offset < num_possible_cpus())
+		return offset;
+	return NULL;
+}
+
+static void *
+uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
+{
+	(*offset)++;
+	if (*offset < num_possible_cpus())
+		return offset;
+	return NULL;
+}
+
+static void
+uv_ptc_seq_stop(struct seq_file *file, void *data)
+{
+}
+
+/*
+ * Display the statistics thru /proc
+ * data points to the cpu number
+ */
+static int
+uv_ptc_seq_show(struct seq_file *file, void *data)
+{
+	struct ptc_stats *stat;
+	int cpu;
+
+	cpu = *(loff_t *)data;
+
+	if (!cpu) {
+		seq_printf(file,
+		"# cpu requestor requestee one all sretry dretry ptc_i ");
+		seq_printf(file,
+		"sw_ack sflush_us dflush_us sok dnomsg dmult starget\n");
+	}
+	if (cpu < num_possible_cpus() && cpu_online(cpu)) {
+		stat = &per_cpu(ptcstats, cpu);
+		seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld ",
+			   cpu, stat->requestor,
+			   stat->requestee, stat->onetlb, stat->alltlb,
+			   stat->s_retry, stat->d_retry, stat->ptc_i);
+		seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n",
+			   uv_read_global_mmr64(uv_blade_to_pnode
+					(uv_cpu_to_blade_id(cpu)),
+					UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
+			   stat->sflush_ns / 1000, stat->dflush_ns / 1000,
+			   stat->retriesok, stat->nomsg,
+			   stat->multmsg, stat->ntargeted);
+	}
+
+	return 0;
+}
+
+/*
+ *  0: display meaning of the statistics
+ * >0: retry limit
+ */
+static ssize_t
+uv_ptc_proc_write(struct file *file, const char __user *user,
+		  size_t count, loff_t *data)
+{
+	long newmode;
+	char optstr[64];
+
+	if (copy_from_user(optstr, user, count))
+		return -EFAULT;
+	optstr[count - 1] = '\0';
+	if (strict_strtoul(optstr, 10, &newmode) < 0) {
+		printk(KERN_DEBUG "%s is invalid\n", optstr);
+		return -EINVAL;
+	}
+
+	if (newmode == 0) {
+		printk(KERN_DEBUG "# cpu:      cpu number\n");
+		printk(KERN_DEBUG
+		"requestor:  times this cpu was the flush requestor\n");
+		printk(KERN_DEBUG
+		"requestee:  times this cpu was requested to flush its TLBs\n");
+		printk(KERN_DEBUG
+		"one:        times requested to flush a single address\n");
+		printk(KERN_DEBUG
+		"all:        times requested to flush all TLB's\n");
+		printk(KERN_DEBUG
+		"sretry:     number of retries of source-side timeouts\n");
+		printk(KERN_DEBUG
+		"dretry:     number of retries of destination-side timeouts\n");
+		printk(KERN_DEBUG
+		"ptc_i:      times UV fell through to IPI-style flushes\n");
+		printk(KERN_DEBUG
+		"sw_ack:     image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n");
+		printk(KERN_DEBUG
+		"sflush_us:  microseconds spent in uv_flush_tlb_others()\n");
+		printk(KERN_DEBUG
+		"dflush_us:  microseconds spent in handling flush requests\n");
+		printk(KERN_DEBUG "sok:        successes on retry\n");
+		printk(KERN_DEBUG "dnomsg:     interrupts with no message\n");
+		printk(KERN_DEBUG
+		"dmult:      interrupts with multiple messages\n");
+		printk(KERN_DEBUG "starget:    nodes targeted\n");
+	} else {
+		uv_bau_retry_limit = newmode;
+		printk(KERN_DEBUG "timeout retry limit:%d\n",
+		       uv_bau_retry_limit);
+	}
+
+	return count;
+}
+
+static const struct seq_operations uv_ptc_seq_ops = {
+	.start = uv_ptc_seq_start,
+	.next = uv_ptc_seq_next,
+	.stop = uv_ptc_seq_stop,
+	.show = uv_ptc_seq_show
+};
+
+static int
+uv_ptc_proc_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &uv_ptc_seq_ops);
+}
+
+static const struct file_operations proc_uv_ptc_operations = {
+	.open = uv_ptc_proc_open,
+	.read = seq_read,
+	.write = uv_ptc_proc_write,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+static struct proc_dir_entry *proc_uv_ptc;
+
+static int __init
+uv_ptc_init(void)
+{
+	static struct proc_dir_entry *sgi_proc_dir;
+
+	sgi_proc_dir = NULL;
+
+	if (!is_uv_system())
+		return 0;
+
+	sgi_proc_dir = proc_mkdir("sgi_uv", NULL);
+	if (!sgi_proc_dir)
+		return -EINVAL;
+
+	proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL);
+	if (!proc_uv_ptc) {
+		printk(KERN_ERR "unable to create %s proc entry\n",
+		       UV_PTC_BASENAME);
+		return -EINVAL;
+	}
+	proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
+	return 0;
+}
+
+static void __exit
+uv_ptc_exit(void)
+{
+	remove_proc_entry(UV_PTC_BASENAME, NULL);
+}
+
+module_init(uv_ptc_init);
+module_exit(uv_ptc_exit);
+
+/*
+ * Initialization of BAU-related structures
+ */
+int __init
+uv_bau_init(void)
+{
+	int i;
+	int j;
+	int blade;
+	int nblades;
+	int *ip;
+	int pnode;
+	int last_blade;
+	int cur_cpu = 0;
+	unsigned long pa;
+	unsigned long n;
+	unsigned long m;
+	unsigned long mmr_image;
+	unsigned long apicid;
+	char *cp;
+	struct bau_control *bau_tablesp;
+	struct bau_activation_descriptor *adp, *ad2;
+	struct bau_payload_queue_entry *pqp;
+	struct bau_msg_status *msp;
+	struct bau_control *bcp;
+
+	if (!is_uv_system())
+		return 0;
+
+	uv_bau_retry_limit = 1;
+
+	if ((sizeof(struct bau_local_cpumask) * BITSPERBYTE) <
+	    MAX_CPUS_PER_NODE) {
+		printk(KERN_ERR
+			"uv_bau_init: bau_local_cpumask.bits too small\n");
+		BUG();
+	}
+
+	uv_nshift = uv_hub_info->n_val;
+	uv_mmask = ((unsigned long)1 << uv_hub_info->n_val) - 1;
+	nblades = 0;
+	last_blade = -1;
+	for_each_online_node(i) {
+		blade = uv_node_to_blade_id(i);
+		if (blade == last_blade)
+			continue;
+		last_blade = blade;
+		nblades++;
+	}
+
+	uv_bau_table_bases = (struct bau_control **)
+	    kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
+	if (!uv_bau_table_bases)
+		BUG();
+
+	/* better if we had each_online_blade */
+	last_blade = -1;
+	for_each_online_node(i) {
+		blade = uv_node_to_blade_id(i);
+		if (blade == last_blade)
+			continue;
+		last_blade = blade;
+
+		bau_tablesp =
+		    kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, i);
+		if (!bau_tablesp)
+			BUG();
+
+		bau_tablesp->msg_statuses =
+		    kmalloc_node(sizeof(struct bau_msg_status) *
+				 DESTINATION_PAYLOAD_QUEUE_SIZE, GFP_KERNEL, i);
+		if (!bau_tablesp->msg_statuses)
+			BUG();
+		for (j = 0, msp = bau_tablesp->msg_statuses;
+		     j < DESTINATION_PAYLOAD_QUEUE_SIZE; j++, msp++) {
+			bau_cpubits_clear(&msp->seen_by, (int)
+					  uv_blade_nr_possible_cpus(blade));
+		}
+
+		bau_tablesp->watching =
+		    kmalloc_node(sizeof(int) * DESTINATION_NUM_RESOURCES,
+				 GFP_KERNEL, i);
+		if (!bau_tablesp->watching)
+			BUG();
+		for (j = 0, ip = bau_tablesp->watching;
+		     j < DESTINATION_PAYLOAD_QUEUE_SIZE; j++, ip++) {
+			*ip = 0;
+		}
+
+		uv_bau_table_bases[i] = bau_tablesp;
+
+		pnode = uv_blade_to_pnode(blade);
+
+		if (sizeof(struct bau_activation_descriptor) != 64)
+			BUG();
+
+		adp = (struct bau_activation_descriptor *)
+		    kmalloc_node(16384, GFP_KERNEL, i);
+		if (!adp)
+			BUG();
+		if ((unsigned long)adp & 0xfff)
+			BUG();
+		pa = __pa((unsigned long)adp);
+		n = pa >> uv_nshift;
+		m = pa & uv_mmask;
+
+		mmr_image = uv_read_global_mmr64(pnode,
+						 UVH_LB_BAU_SB_DESCRIPTOR_BASE);
+		if (mmr_image)
+			uv_write_global_mmr64(pnode, (unsigned long)
+					      UVH_LB_BAU_SB_DESCRIPTOR_BASE,
+					      (n << UV_DESC_BASE_PNODE_SHIFT |
+					       m));
+		for (j = 0, ad2 = adp; j < UV_ACTIVATION_DESCRIPTOR_SIZE;
+		     j++, ad2++) {
+			memset(ad2, 0,
+			       sizeof(struct bau_activation_descriptor));
+			ad2->header.sw_ack_flag = 1;
+			ad2->header.base_dest_nodeid =
+			    uv_blade_to_pnode(uv_cpu_to_blade_id(0));
+			ad2->header.command = UV_NET_ENDPOINT_INTD;
+			ad2->header.int_both = 1;
+			/* all others need to be set to zero:
+			   fairness chaining multilevel count replied_to */
+		}
+
+		pqp = (struct bau_payload_queue_entry *)
+		    kmalloc_node((DESTINATION_PAYLOAD_QUEUE_SIZE + 1) *
+				 sizeof(struct bau_payload_queue_entry),
+				 GFP_KERNEL, i);
+		if (!pqp)
+			BUG();
+		if (sizeof(struct bau_payload_queue_entry) != 32)
+			BUG();
+		if ((unsigned long)(&((struct bau_payload_queue_entry *)0)->
+				    sw_ack_vector) != 15)
+			BUG();
+
+		cp = (char *)pqp + 31;
+		pqp = (struct bau_payload_queue_entry *)
+		    (((unsigned long)cp >> 5) << 5);
+		bau_tablesp->va_queue_first = pqp;
+		uv_write_global_mmr64(pnode,
+				      UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
+				      ((unsigned long)pnode <<
+				       UV_PAYLOADQ_PNODE_SHIFT) |
+				      uv_physnodeaddr(pqp));
+		uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
+				      uv_physnodeaddr(pqp));
+		bau_tablesp->va_queue_last =
+		    pqp + (DESTINATION_PAYLOAD_QUEUE_SIZE - 1);
+		uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
+				      (unsigned long)
+				      uv_physnodeaddr(bau_tablesp->
+						      va_queue_last));
+		memset(pqp, 0, sizeof(struct bau_payload_queue_entry) *
+		       DESTINATION_PAYLOAD_QUEUE_SIZE);
+
+		/* this initialization can't be in firmware because the
+		   messaging IRQ will be determined by the OS */
+		apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
+		pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
+		if ((pa & 0xff) != UV_BAU_MESSAGE) {
+			uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
+					      ((apicid << 32) |
+					       UV_BAU_MESSAGE));
+		}
+
+		for (j = cur_cpu; j < (cur_cpu + uv_blade_nr_possible_cpus(i));
+		     j++) {
+			bcp = (struct bau_control *)&per_cpu(bau_control, j);
+			bcp->bau_msg_head = bau_tablesp->va_queue_first;
+			bcp->va_queue_first = bau_tablesp->va_queue_first;
+
+			bcp->va_queue_last = bau_tablesp->va_queue_last;
+			bcp->watching = bau_tablesp->watching;
+			bcp->msg_statuses = bau_tablesp->msg_statuses;
+			bcp->descriptor_base = adp;
+		}
+		cur_cpu += uv_blade_nr_possible_cpus(i);
+	}
+
+	set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
+
+	uv_enable_timeouts();
+
+	return 0;
+}
+
+__initcall(uv_bau_init);
diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h
new file mode 100644
index 00000000000..f125f86c89a
--- /dev/null
+++ b/include/asm-x86/uv/uv_bau.h
@@ -0,0 +1,331 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV Broadcast Assist Unit definitions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef __ASM_X86_UV_BAU__
+#define __ASM_X86_UV_BAU__
+
+#include <linux/bitmap.h>
+#define BITSPERBYTE 8
+
+/* Broadcast Assist Unit messaging structures */
+
+/*
+ * Selective Broadcast activations are induced by software action
+ * specifying a particular 8-descriptor "set" via a 6-bit index written
+ * to an MMR.
+ * Thus there are 64 unique 512-byte sets of SB descriptors - one set for
+ * each 6-bit index value. These descriptor sets are mapped in sequence
+ * starting with set 0 located at the address specified in the
+ * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
+ * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
+ *
+ * We will use 31 sets, one for sending BAU messages from each of the 32
+ * cpu's on the node.
+ *
+ * TLB shootdown will use the first of the 8 descriptors of each set.
+ * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
+ */
+
+#define UV_ITEMS_PER_DESCRIPTOR 8
+#define UV_CPUS_PER_ACT_STATUS 32
+#define UV_ACT_STATUS_MASK 0x3
+#define UV_ACT_STATUS_SIZE 2
+#define UV_ACTIVATION_DESCRIPTOR_SIZE 32
+#define UV_DISTRIBUTION_SIZE 256
+#define UV_SW_ACK_NPENDING 8
+#define UV_BAU_MESSAGE 200	/* Messaging irq; see irq_64.h */
+				/* and include/asm-x86/hw_irq_64.h */
+				/* To be dynamically allocated in the future */
+#define UV_NET_ENDPOINT_INTD 0x38
+#define UV_DESC_BASE_PNODE_SHIFT 49 /* position of pnode (nasid>>1) in MMR */
+#define UV_PAYLOADQ_PNODE_SHIFT 49
+
+#define UV_PTC_BASENAME "sgi_uv/ptc_statistics"
+#define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask))
+
+/* bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 */
+#define DESC_STATUS_IDLE                0
+#define DESC_STATUS_ACTIVE              1
+#define DESC_STATUS_DESTINATION_TIMEOUT 2
+#define DESC_STATUS_SOURCE_TIMEOUT      3
+
+/* source side threshholds at which message retries print a warning */
+#define SOURCE_TIMEOUT_LIMIT            20
+#define DESTINATION_TIMEOUT_LIMIT       20
+
+/* number of entries in the destination side payload queue */
+#define DESTINATION_PAYLOAD_QUEUE_SIZE  17
+/* number of destination side software ack resources */
+#define DESTINATION_NUM_RESOURCES       8
+#define MAX_CPUS_PER_NODE		32
+
+/* Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) */
+/* If the 'multilevel' flag in the header portion of the descriptor
+ * has been set to 0, then endpoint multi-unicast mode is selected.
+ * The distribution specification (32 bytes) is interpreted as a 256-bit
+ * distribution vector. Adjacent bits correspond to consecutive even numbered
+ * nodeIDs. The result of adding the index of a given bit to the 15-bit
+ * 'base_dest_nodeid' field of the header corresponds to the
+ * destination nodeID associated with that specified bit.  */
+struct bau_target_nodemask {
+	unsigned long bits[BITS_TO_LONGS(256)];
+};
+
+/* mask of cpu's on a node */
+/* (during initialization we need to check that unsigned long has
+    enough bits for max. cpu's per node) */
+struct bau_local_cpumask {
+	unsigned long bits;
+};
+
+/*
+ * Payload: 16 bytes (128 bits) (bytes 0x20-0x2f of descriptor)
+ * only 12 bytes (96 bits) of the payload area are usable.
+ * An additional 3 bytes (bits 27:4) of the header address are carried
+ * to the next bytes of the destination payload queue.
+ * And an additional 2 bytes of the header Suppl_A field are also
+ * carried to the destination payload queue.
+ * But the first byte of the Suppl_A becomes bits 127:120 (the 16th byte)
+ * of the destination payload queue, which is written by the hardware
+ * with the s/w ack resource bit vector.
+ * [ effective message contents (16 bytes (128 bits) maximum), not counting
+ *   the s/w ack bit vector  ]
+ */
+
+/* The payload is software-defined for INTD transactions */
+struct bau_msg_payload {
+	unsigned long address;		/* signifies a page or all TLB's
+						of the cpu */
+	/* 64 bits */
+	unsigned short sending_cpu;	/* filled in by sender */
+	/* 16 bits */
+	unsigned short acknowledge_count;/* filled in by destination */
+	/* 16 bits */
+	unsigned int reserved1:32;	/* not usable */
+};
+
+
+/* Message header:  16 bytes (128 bits) (bytes 0x30-0x3f of descriptor) */
+/* see table 4.2.3.0.1 in broacast_assist spec. */
+struct bau_msg_header {
+	int dest_subnodeid:6;	/* must be zero */
+	/* bits 5:0 */
+	int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */
+	/* bits 20:6 */
+	int command:8;		/* message type */
+	/* bits 28:21 */
+				/* 0x38: SN3net EndPoint Message */
+	int rsvd_1:3;		/* must be zero */
+	/* bits 31:29 */
+				/* int will align on 32 bits */
+	int rsvd_2:9;		/* must be zero */
+	/* bits 40:32 */
+				/* Suppl_A is 56-41 */
+	int payload_2a:8;	/* becomes byte 16 of msg */
+	/* bits 48:41 */	/* not currently using */
+	int payload_2b:8;	/* becomes byte 17 of msg */
+	/* bits 56:49 */	/* not currently using */
+				/* Address field (96:57) is never used as an
+				   address (these are address bits 42:3) */
+	int rsvd_3:1;		/* must be zero */
+	/* bit 57 */
+				/* address bits 27:4 are payload */
+				/* these 24 bits become bytes 12-14 of msg */
+	int replied_to:1;	/* sent as 0 by the source to byte 12 */
+	/* bit 58 */
+
+	int payload_1a:5;	/* not currently used */
+	/* bits 63:59 */
+	int payload_1b:8;	/* not currently used */
+	/* bits 71:64 */
+	int payload_1c:8;	/* not currently used */
+	/* bits 79:72 */
+	int payload_1d:2;	/* not currently used */
+	/* bits 81:80 */
+
+	int rsvd_4:7;		/* must be zero */
+	/* bits 88:82 */
+	int sw_ack_flag:1;	/* software acknowledge flag */
+	/* bit 89 */
+				/* INTD trasactions at destination are to
+				   wait for software acknowledge */
+	int rsvd_5:6;		/* must be zero */
+	/* bits 95:90 */
+	int rsvd_6:5;		/* must be zero */
+	/* bits 100:96 */
+	int int_both:1;		/* if 1, interrupt both sockets on the blade */
+	/* bit 101*/
+	int fairness:3;		/* usually zero */
+	/* bits 104:102 */
+	int multilevel:1;	/* multi-level multicast format */
+	/* bit 105 */
+				/* 0 for TLB: endpoint multi-unicast messages */
+	int chaining:1;		/* next descriptor is part of this activation*/
+	/* bit 106 */
+	int rsvd_7:21;		/* must be zero */
+	/* bits 127:107 */
+};
+
+/* The format of the message to send, plus all accompanying control */
+/* Should be 64 bytes */
+struct bau_activation_descriptor {
+	struct bau_target_nodemask distribution;
+	/* message template, consisting of header and payload: */
+	struct bau_msg_header header;
+	struct bau_msg_payload payload;
+};
+/*
+ *   -payload--    ---------header------
+ *   bytes 0-11    bits 41-56  bits 58-81
+ *       A           B  (2)      C (3)
+ *
+ *            A/B/C are moved to:
+ *       A            C          B
+ *   bytes 0-11  bytes 12-14  bytes 16-17  (byte 15 filled in by hw as vector)
+ *   ------------payload queue-----------
+ */
+
+/*
+ * The payload queue on the destination side is an array of these.
+ * With BAU_MISC_CONTROL set for software acknowledge mode, the messages
+ * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17
+ * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120)
+ * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from
+ *  sw_ack_vector and payload_2)
+ * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software
+ *  Acknowledge Processing) also selects 32 byte (17 bytes usable) payload
+ *  operation."
+ */
+struct bau_payload_queue_entry {
+	unsigned long address;		/* signifies a page or all TLB's
+						of the cpu */
+	/* 64 bits, bytes 0-7 */
+
+	unsigned short sending_cpu;	/* cpu that sent the message */
+	/* 16 bits, bytes 8-9 */
+
+	unsigned short acknowledge_count; /* filled in by destination */
+	/* 16 bits, bytes 10-11 */
+
+	unsigned short replied_to:1;	/* sent as 0 by the source */
+	/* 1 bit */
+	unsigned short unused1:7;       /* not currently using */
+	/* 7 bits: byte 12) */
+
+	unsigned char unused2[2];	/* not currently using */
+	/* bytes 13-14 */
+
+	unsigned char sw_ack_vector;	/* filled in by the hardware */
+	/* byte 15 (bits 127:120) */
+
+	unsigned char unused4[3];	/* not currently using bytes 17-19 */
+	/* bytes 17-19 */
+
+	int number_of_cpus;		/* filled in at destination */
+	/* 32 bits, bytes 20-23 (aligned) */
+
+	unsigned char unused5[8];       /* not using */
+	/* bytes 24-31 */
+};
+
+/* one for every slot in the destination payload queue */
+struct bau_msg_status {
+	struct bau_local_cpumask seen_by;	/* map of cpu's */
+};
+
+/* one for every slot in the destination software ack resources */
+struct bau_sw_ack_status {
+	struct bau_payload_queue_entry *msg;	/* associated message */
+	int watcher;				/* cpu monitoring, or -1 */
+};
+
+/* one on every node and per-cpu; to locate the software tables */
+struct bau_control {
+	struct bau_activation_descriptor *descriptor_base;
+	struct bau_payload_queue_entry *bau_msg_head;
+	struct bau_payload_queue_entry *va_queue_first;
+	struct bau_payload_queue_entry *va_queue_last;
+	struct bau_msg_status *msg_statuses;
+	int *watching; /* pointer to array */
+};
+
+/*
+ * This structure is allocated per_cpu for UV TLB shootdown statistics.
+ */
+struct ptc_stats {
+	unsigned long ptc_i;	/* number of IPI-style flushes */
+	unsigned long requestor;	/* number of nodes this cpu sent to */
+	unsigned long requestee;	/* times cpu was remotely requested */
+	unsigned long alltlb;	/* times all tlb's on this cpu were flushed */
+	unsigned long onetlb;	/* times just one tlb on this cpu was flushed */
+	unsigned long s_retry;	/* retries on source side timeouts */
+	unsigned long d_retry;	/* retries on destination side timeouts */
+	unsigned long sflush_ns;/* nanoseconds spent in uv_flush_tlb_others */
+	unsigned long dflush_ns;/* nanoseconds spent destination side */
+	unsigned long retriesok; /* successes on retries */
+	unsigned long nomsg;	/* interrupts with no message */
+	unsigned long multmsg;	/* interrupts with multiple messages */
+	unsigned long ntargeted;/* nodes targeted */
+};
+
+static inline int bau_node_isset(int node, struct bau_target_nodemask *dstp)
+{
+	return constant_test_bit(node, &dstp->bits[0]);
+}
+static inline void bau_node_set(int node, struct bau_target_nodemask *dstp)
+{
+	__set_bit(node, &dstp->bits[0]);
+}
+static inline void bau_nodes_clear(struct bau_target_nodemask *dstp, int nbits)
+{
+	bitmap_zero(&dstp->bits[0], nbits);
+}
+
+static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
+{
+	bitmap_zero(&dstp->bits, nbits);
+}
+
+/*
+ * atomic increment of a short integer
+ * (rather than using the __sync_add_and_fetch() intrinsic)
+ *
+ * returns the new value of the variable
+ */
+static inline short int atomic_inc_short(short int *v)
+{
+	asm volatile("movw $1, %%cx\n"
+			"lock ; xaddw %%cx, %0\n"
+			: "+m" (*v)		/* outputs */
+			: : "%cx", "memory");	/* inputs : clobbereds */
+	return *v;
+}
+
+/*
+ * atomic OR of two long integers
+ * (rather than using the __sync_or_and_fetch() intrinsic)
+ */
+static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
+{
+	asm volatile("movq %0, %%rax; lea %1, %%rdx\n"
+			"lock ; orq %%rax, %%rdx\n"
+			: "+m" (*v1)		/* outputs */
+			: "m" (v1), "m" (v2)	/* inputs */
+			: "memory");		/* clobbereds */
+}
+
+#define cpubit_isset(cpu, bau_local_cpumask) \
+	test_bit((cpu), (bau_local_cpumask).bits)
+
+int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);
+void uv_bau_message_intr1(void);
+void uv_bau_timeout_intr1(void);
+
+#endif /* __ASM_X86_UV_BAU__ */
-- 
cgit v1.2.3-70-g09d2


From 73e991f45fe7644711c0c9dd357a1a2c6e222707 Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Wed, 4 Jun 2008 15:33:17 -0500
Subject: x86 atomic operations: atomic_or_long() atomic_inc_short()

Provide atomic operations for increment of a 16-bit integer and
logical OR into a 64-bit integer.

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Reviewed-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/atomic_64.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/atomic_64.h b/include/asm-x86/atomic_64.h
index 3e0cd7d3833..55c0dd9382b 100644
--- a/include/asm-x86/atomic_64.h
+++ b/include/asm-x86/atomic_64.h
@@ -431,6 +431,32 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
 	return c != (u);
 }
 
+/**
+ * atomic_inc_short - increment of a short integer
+ * @v: pointer to type int
+ *
+ * Atomically adds 1 to @v
+ * Returns the new value of @u
+ */
+static inline short int atomic_inc_short(short int *v)
+{
+	asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
+	return *v;
+}
+
+/**
+ * atomic_or_long - OR of two long integers
+ * @v1: pointer to type unsigned long
+ * @v2: pointer to type unsigned long
+ *
+ * Atomically ORs @v1 and @v2
+ * Returns the result of the OR
+ */
+static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
+{
+	asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2));
+}
+
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
 
 /* These are x86-specific, used by some header files */
-- 
cgit v1.2.3-70-g09d2


From b194b120507276b4f09e2e14f941884e777fc7c8 Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Thu, 12 Jun 2008 08:23:48 -0500
Subject: SGI UV: TLB shootdown using broadcast assist unit, cleanups

TLB shootdown for SGI UV.

v1: 6/2 original
v2: 6/3 corrections/improvements per Ingo's review
v3: 6/4 split atomic operations off to a separate patch (Jeremy's review)
v4: 6/12 include <mach_apic.h> rather than <asm/mach-bigsmp/mach_apic.h>
         (fixes a !SMP build problem that Ingo found)
         fix the index on uv_table_bases[blade]

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/tlb_64.c    |   2 +-
 arch/x86/kernel/tlb_uv.c    | 704 ++++++++++++++++++++++++--------------------
 include/asm-x86/uv/uv_bau.h | 147 ++++-----
 3 files changed, 454 insertions(+), 399 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index fc132113bda..5039d0f097a 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -165,7 +165,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	cpumask_t cpumask = *cpumaskp;
 
 	if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
-			return;
+		return;
 
 	/* Caller has disabled preemption */
 	sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 28e7c68d9d7..f7bc6a6fbe4 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -10,18 +10,20 @@
 #include <linux/proc_fs.h>
 #include <linux/kernel.h>
 
-#include <asm/mach-bigsmp/mach_apic.h>
 #include <asm/mmu_context.h>
 #include <asm/idle.h>
 #include <asm/genapic.h>
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_bau.h>
+#include <asm/tsc.h>
 
-struct bau_control **uv_bau_table_bases;
-static int uv_bau_retry_limit;
-static int uv_nshift;		/* position of pnode (which is nasid>>1) */
-static unsigned long uv_mmask;
+#include <mach_apic.h>
+
+static struct bau_control **uv_bau_table_bases __read_mostly;
+static int uv_bau_retry_limit __read_mostly;
+static int uv_nshift __read_mostly; /* position of pnode (which is nasid>>1) */
+static unsigned long uv_mmask __read_mostly;
 
 char *status_table[] = {
 	"IDLE",
@@ -41,19 +43,18 @@ DEFINE_PER_CPU(struct bau_control, bau_control);
  * clear of the Timeout bit (as well) will free the resource. No reply will
  * be sent (the hardware will only do one reply per message).
  */
-static void
-uv_reply_to_message(int resource,
+static void uv_reply_to_message(int resource,
 		    struct bau_payload_queue_entry *msg,
 		    struct bau_msg_status *msp)
 {
-	int fw;
+	unsigned long dw;
 
-	fw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource);
+	dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource);
 	msg->replied_to = 1;
 	msg->sw_ack_vector = 0;
 	if (msp)
 		msp->seen_by.bits = 0;
-	uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, fw);
+	uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw);
 	return;
 }
 
@@ -61,8 +62,7 @@ uv_reply_to_message(int resource,
  * Do all the things a cpu should do for a TLB shootdown message.
  * Other cpu's may come here at the same time for this message.
  */
-static void
-uv_bau_process_message(struct bau_payload_queue_entry *msg,
+static void uv_bau_process_message(struct bau_payload_queue_entry *msg,
 		       int msg_slot, int sw_ack_slot)
 {
 	int cpu;
@@ -103,8 +103,7 @@ uv_bau_process_message(struct bau_payload_queue_entry *msg,
  *
  * Returns the number of cpu's that have not responded.
  */
-static int
-uv_examine_destinations(struct bau_target_nodemask *distribution)
+static int uv_examine_destinations(struct bau_target_nodemask *distribution)
 {
 	int sender;
 	int i;
@@ -118,34 +117,161 @@ uv_examine_destinations(struct bau_target_nodemask *distribution)
 	sender = smp_processor_id();
 	for (i = 0; i < (sizeof(struct bau_target_nodemask) * BITSPERBYTE);
 	     i++) {
-		if (bau_node_isset(i, distribution)) {
-			bau_tablesp = uv_bau_table_bases[i];
-			for (msg = bau_tablesp->va_queue_first, j = 0;
-			     j < DESTINATION_PAYLOAD_QUEUE_SIZE; msg++, j++) {
-				if ((msg->sending_cpu == sender) &&
-				    (!msg->replied_to)) {
-					msp = bau_tablesp->msg_statuses + j;
-					printk(KERN_DEBUG
+		if (!bau_node_isset(i, distribution))
+			continue;
+		bau_tablesp = uv_bau_table_bases[i];
+		for (msg = bau_tablesp->va_queue_first, j = 0;
+		     j < DESTINATION_PAYLOAD_QUEUE_SIZE; msg++, j++) {
+			if ((msg->sending_cpu == sender) &&
+			    (!msg->replied_to)) {
+				msp = bau_tablesp->msg_statuses + j;
+				printk(KERN_DEBUG
 				"blade %d: address:%#lx %d of %d, not cpu(s): ",
-					       i, msg->address,
-					       msg->acknowledge_count,
-					       msg->number_of_cpus);
-					for (k = 0; k < msg->number_of_cpus;
-					     k++) {
-						if (!((long)1 << k & msp->
-						      seen_by.bits)) {
-							count++;
-							printk("%d ", k);
-						}
+				       i, msg->address,
+				       msg->acknowledge_count,
+				       msg->number_of_cpus);
+				for (k = 0; k < msg->number_of_cpus;
+				     k++) {
+					if (!((long)1 << k & msp->
+					      seen_by.bits)) {
+						count++;
+						printk("%d ", k);
 					}
-					printk("\n");
 				}
+				printk("\n");
 			}
 		}
 	}
 	return count;
 }
 
+/*
+ * wait for completion of a broadcast message
+ *
+ * return COMPLETE, RETRY or GIVEUP
+ */
+static int uv_wait_completion(struct bau_activation_descriptor *bau_desc,
+			      unsigned long mmr_offset, int right_shift)
+{
+	int exams = 0;
+	long destination_timeouts = 0;
+	long source_timeouts = 0;
+	unsigned long descriptor_status;
+
+	while ((descriptor_status = (((unsigned long)
+		uv_read_local_mmr(mmr_offset) >>
+			right_shift) & UV_ACT_STATUS_MASK)) !=
+			DESC_STATUS_IDLE) {
+		if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) {
+			source_timeouts++;
+			if (source_timeouts > SOURCE_TIMEOUT_LIMIT)
+				source_timeouts = 0;
+			__get_cpu_var(ptcstats).s_retry++;
+			return FLUSH_RETRY;
+		}
+		/*
+		 * spin here looking for progress at the destinations
+		 */
+		if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) {
+			destination_timeouts++;
+			if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) {
+				/*
+				 * returns number of cpus not responding
+				 */
+				if (uv_examine_destinations
+				    (&bau_desc->distribution) == 0) {
+					__get_cpu_var(ptcstats).d_retry++;
+					return FLUSH_RETRY;
+				}
+				exams++;
+				if (exams >= uv_bau_retry_limit) {
+					printk(KERN_DEBUG
+					       "uv_flush_tlb_others");
+					printk("giving up on cpu %d\n",
+					       smp_processor_id());
+					return FLUSH_GIVEUP;
+				}
+				/*
+				 * delays can hang the simulator
+				   udelay(1000);
+				 */
+				destination_timeouts = 0;
+			}
+		}
+	}
+	return FLUSH_COMPLETE;
+}
+
+/**
+ * uv_flush_send_and_wait
+ *
+ * Send a broadcast and wait for a broadcast message to complete.
+ *
+ * The cpumaskp mask contains the cpus the broadcast was sent to.
+ *
+ * Returns 1 if all remote flushing was done. The mask is zeroed.
+ * Returns 0 if some remote flushing remains to be done. The mask is left
+ * unchanged.
+ */
+int uv_flush_send_and_wait(int cpu, int this_blade,
+	struct bau_activation_descriptor *bau_desc, cpumask_t *cpumaskp)
+{
+	int completion_status = 0;
+	int right_shift;
+	int bit;
+	int blade;
+	int tries = 0;
+	unsigned long index;
+	unsigned long mmr_offset;
+	cycles_t time1;
+	cycles_t time2;
+
+	if (cpu < UV_CPUS_PER_ACT_STATUS) {
+		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
+		right_shift = cpu * UV_ACT_STATUS_SIZE;
+	} else {
+		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
+		right_shift =
+		    ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
+	}
+	time1 = get_cycles();
+	do {
+		tries++;
+		index = ((unsigned long)
+			1 << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | cpu;
+		uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
+		completion_status = uv_wait_completion(bau_desc, mmr_offset,
+					right_shift);
+	} while (completion_status == FLUSH_RETRY);
+	time2 = get_cycles();
+	__get_cpu_var(ptcstats).sflush += (time2 - time1);
+	if (tries > 1)
+		__get_cpu_var(ptcstats).retriesok++;
+
+	if (completion_status == FLUSH_GIVEUP) {
+		/*
+		 * Cause the caller to do an IPI-style TLB shootdown on
+		 * the cpu's, all of which are still in the mask.
+		 */
+		__get_cpu_var(ptcstats).ptc_i++;
+		return 0;
+	}
+
+	/*
+	 * Success, so clear the remote cpu's from the mask so we don't
+	 * use the IPI method of shootdown on them.
+	 */
+	for_each_cpu_mask(bit, *cpumaskp) {
+		blade = uv_cpu_to_blade_id(bit);
+		if (blade == this_blade)
+			continue;
+		cpu_clear(bit, *cpumaskp);
+	}
+	if (!cpus_empty(*cpumaskp))
+		return 0;
+	return 1;
+}
+
 /**
  * uv_flush_tlb_others - globally purge translation cache of a virtual
  * address or all TLB's
@@ -164,30 +290,25 @@ uv_examine_destinations(struct bau_target_nodemask *distribution)
  *
  * The cpumaskp is converted into a nodemask of the nodes containing
  * the cpus.
+ *
+ * Returns 1 if all remote flushing was done.
+ * Returns 0 if some remote flushing remains to be done.
  */
-int
-uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, unsigned long va)
+int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
+	unsigned long va)
 {
 	int i;
+	int bit;
 	int blade;
 	int cpu;
-	int bit;
-	int right_shift;
 	int this_blade;
-	int exams = 0;
-	int tries = 0;
-	long source_timeouts = 0;
-	long destination_timeouts = 0;
-	unsigned long index;
-	unsigned long mmr_offset;
-	unsigned long descriptor_status;
+	int locals = 0;
 	struct bau_activation_descriptor *bau_desc;
-	ktime_t time1, time2;
 
 	cpu = uv_blade_processor_id();
 	this_blade = uv_numa_blade_id();
 	bau_desc = __get_cpu_var(bau_control).descriptor_base;
-	bau_desc += (UV_ITEMS_PER_DESCRIPTOR * cpu);
+	bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu;
 
 	bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
 
@@ -196,96 +317,29 @@ uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, unsigned long va)
 		blade = uv_cpu_to_blade_id(bit);
 		if (blade > (UV_DISTRIBUTION_SIZE - 1))
 			BUG();
-		if (blade == this_blade)
+		if (blade == this_blade) {
+			locals++;
 			continue;
+		}
 		bau_node_set(blade, &bau_desc->distribution);
-		/* leave the bits for the remote cpu's in the mask until
-		   success; on failure we fall back to the IPI method */
 		i++;
 	}
-	if (i == 0)
-		goto none_to_flush;
+	if (i == 0) {
+		/*
+		 * no off_node flushing; return status for local node
+		 */
+		if (locals)
+			return 0;
+		else
+			return 1;
+	}
 	__get_cpu_var(ptcstats).requestor++;
 	__get_cpu_var(ptcstats).ntargeted += i;
 
 	bau_desc->payload.address = va;
 	bau_desc->payload.sending_cpu = smp_processor_id();
 
-	if (cpu < UV_CPUS_PER_ACT_STATUS) {
-		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
-		right_shift = cpu * UV_ACT_STATUS_SIZE;
-	} else {
-		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
-		right_shift =
-		    ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
-	}
-	time1 = ktime_get();
-
-retry:
-	tries++;
-	index = ((unsigned long)
-		 1 << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | cpu;
-	uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
-
-	while ((descriptor_status = (((unsigned long)
-				      uv_read_local_mmr(mmr_offset) >>
-				      right_shift) & UV_ACT_STATUS_MASK)) !=
-	       DESC_STATUS_IDLE) {
-		if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) {
-			source_timeouts++;
-			if (source_timeouts > SOURCE_TIMEOUT_LIMIT)
-				source_timeouts = 0;
-			__get_cpu_var(ptcstats).s_retry++;
-			goto retry;
-		}
-		/* spin here looking for progress at the destinations */
-		if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) {
-			destination_timeouts++;
-			if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) {
-				/* returns # of cpus not responding */
-				if (uv_examine_destinations
-				    (&bau_desc->distribution) == 0) {
-					__get_cpu_var(ptcstats).d_retry++;
-					goto retry;
-				}
-				exams++;
-				if (exams >= uv_bau_retry_limit) {
-					printk(KERN_DEBUG
-					       "uv_flush_tlb_others");
-					printk("giving up on cpu %d\n",
-					       smp_processor_id());
-					goto unsuccessful;
-				}
-				/* delays can hang up the simulator
-				   udelay(1000);
-				 */
-				destination_timeouts = 0;
-			}
-		}
-	}
-	if (tries > 1)
-		__get_cpu_var(ptcstats).retriesok++;
-	/* on success, clear the remote cpu's from the mask so we don't
-	   use the IPI method of shootdown on them */
-	for_each_cpu_mask(bit, *cpumaskp) {
-		blade = uv_cpu_to_blade_id(bit);
-		if (blade == this_blade)
-			continue;
-		cpu_clear(bit, *cpumaskp);
-	}
-
-unsuccessful:
-	time2 = ktime_get();
-	__get_cpu_var(ptcstats).sflush_ns += (time2.tv64 - time1.tv64);
-
-none_to_flush:
-	if (cpus_empty(*cpumaskp))
-		return 1;
-
-	/* Cause the caller to do an IPI-style TLB shootdown on
-	   the cpu's still in the mask */
-	__get_cpu_var(ptcstats).ptc_i++;
-	return 0;
+	return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp);
 }
 
 /*
@@ -302,13 +356,12 @@ none_to_flush:
  * (the resource will not be freed until noninterruptable cpus see this
  *  interrupt; hardware will timeout the s/w ack and reply ERROR)
  */
-void
-uv_bau_message_interrupt(struct pt_regs *regs)
+void uv_bau_message_interrupt(struct pt_regs *regs)
 {
 	struct bau_payload_queue_entry *pqp;
 	struct bau_payload_queue_entry *msg;
 	struct pt_regs *old_regs = set_irq_regs(regs);
-	ktime_t time1, time2;
+	cycles_t time1, time2;
 	int msg_slot;
 	int sw_ack_slot;
 	int fw;
@@ -319,7 +372,7 @@ uv_bau_message_interrupt(struct pt_regs *regs)
 	exit_idle();
 	irq_enter();
 
-	time1 = ktime_get();
+	time1 = get_cycles();
 
 	local_pnode = uv_blade_to_pnode(uv_numa_blade_id());
 
@@ -343,16 +396,15 @@ uv_bau_message_interrupt(struct pt_regs *regs)
 	else if (count > 1)
 		__get_cpu_var(ptcstats).multmsg++;
 
-	time2 = ktime_get();
-	__get_cpu_var(ptcstats).dflush_ns += (time2.tv64 - time1.tv64);
+	time2 = get_cycles();
+	__get_cpu_var(ptcstats).dflush += (time2 - time1);
 
 	irq_exit();
 	set_irq_regs(old_regs);
 	return;
 }
 
-static void
-uv_enable_timeouts(void)
+static void uv_enable_timeouts(void)
 {
 	int i;
 	int blade;
@@ -361,7 +413,6 @@ uv_enable_timeouts(void)
 	int cur_cpu = 0;
 	unsigned long apicid;
 
-	/* better if we had each_online_blade */
 	last_blade = -1;
 	for_each_online_node(i) {
 		blade = uv_node_to_blade_id(i);
@@ -375,16 +426,14 @@ uv_enable_timeouts(void)
 	return;
 }
 
-static void *
-uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
+static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
 {
 	if (*offset < num_possible_cpus())
 		return offset;
 	return NULL;
 }
 
-static void *
-uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
+static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
 {
 	(*offset)++;
 	if (*offset < num_possible_cpus())
@@ -392,8 +441,7 @@ uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
 	return NULL;
 }
 
-static void
-uv_ptc_seq_stop(struct seq_file *file, void *data)
+static void uv_ptc_seq_stop(struct seq_file *file, void *data)
 {
 }
 
@@ -401,8 +449,7 @@ uv_ptc_seq_stop(struct seq_file *file, void *data)
  * Display the statistics thru /proc
  * data points to the cpu number
  */
-static int
-uv_ptc_seq_show(struct seq_file *file, void *data)
+static int uv_ptc_seq_show(struct seq_file *file, void *data)
 {
 	struct ptc_stats *stat;
 	int cpu;
@@ -413,7 +460,7 @@ uv_ptc_seq_show(struct seq_file *file, void *data)
 		seq_printf(file,
 		"# cpu requestor requestee one all sretry dretry ptc_i ");
 		seq_printf(file,
-		"sw_ack sflush_us dflush_us sok dnomsg dmult starget\n");
+		"sw_ack sflush dflush sok dnomsg dmult starget\n");
 	}
 	if (cpu < num_possible_cpus() && cpu_online(cpu)) {
 		stat = &per_cpu(ptcstats, cpu);
@@ -425,7 +472,7 @@ uv_ptc_seq_show(struct seq_file *file, void *data)
 			   uv_read_global_mmr64(uv_blade_to_pnode
 					(uv_cpu_to_blade_id(cpu)),
 					UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
-			   stat->sflush_ns / 1000, stat->dflush_ns / 1000,
+			   stat->sflush, stat->dflush,
 			   stat->retriesok, stat->nomsg,
 			   stat->multmsg, stat->ntargeted);
 	}
@@ -437,8 +484,7 @@ uv_ptc_seq_show(struct seq_file *file, void *data)
  *  0: display meaning of the statistics
  * >0: retry limit
  */
-static ssize_t
-uv_ptc_proc_write(struct file *file, const char __user *user,
+static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,
 		  size_t count, loff_t *data)
 {
 	long newmode;
@@ -471,9 +517,9 @@ uv_ptc_proc_write(struct file *file, const char __user *user,
 		printk(KERN_DEBUG
 		"sw_ack:     image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n");
 		printk(KERN_DEBUG
-		"sflush_us:  microseconds spent in uv_flush_tlb_others()\n");
+		"sflush_us:  cycles spent in uv_flush_tlb_others()\n");
 		printk(KERN_DEBUG
-		"dflush_us:  microseconds spent in handling flush requests\n");
+		"dflush_us:  cycles spent in handling flush requests\n");
 		printk(KERN_DEBUG "sok:        successes on retry\n");
 		printk(KERN_DEBUG "dnomsg:     interrupts with no message\n");
 		printk(KERN_DEBUG
@@ -489,40 +535,33 @@ uv_ptc_proc_write(struct file *file, const char __user *user,
 }
 
 static const struct seq_operations uv_ptc_seq_ops = {
-	.start = uv_ptc_seq_start,
-	.next = uv_ptc_seq_next,
-	.stop = uv_ptc_seq_stop,
-	.show = uv_ptc_seq_show
+	.start	= uv_ptc_seq_start,
+	.next	= uv_ptc_seq_next,
+	.stop	= uv_ptc_seq_stop,
+	.show	= uv_ptc_seq_show
 };
 
-static int
-uv_ptc_proc_open(struct inode *inode, struct file *file)
+static int uv_ptc_proc_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &uv_ptc_seq_ops);
 }
 
 static const struct file_operations proc_uv_ptc_operations = {
-	.open = uv_ptc_proc_open,
-	.read = seq_read,
-	.write = uv_ptc_proc_write,
-	.llseek = seq_lseek,
-	.release = seq_release,
+	.open		= uv_ptc_proc_open,
+	.read		= seq_read,
+	.write		= uv_ptc_proc_write,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
 };
 
-static struct proc_dir_entry *proc_uv_ptc;
-
-static int __init
-uv_ptc_init(void)
+static int __init uv_ptc_init(void)
 {
-	static struct proc_dir_entry *sgi_proc_dir;
-
-	sgi_proc_dir = NULL;
+	struct proc_dir_entry *proc_uv_ptc;
 
 	if (!is_uv_system())
 		return 0;
 
-	sgi_proc_dir = proc_mkdir("sgi_uv", NULL);
-	if (!sgi_proc_dir)
+	if (!proc_mkdir("sgi_uv", NULL))
 		return -EINVAL;
 
 	proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL);
@@ -535,202 +574,213 @@ uv_ptc_init(void)
 	return 0;
 }
 
-static void __exit
-uv_ptc_exit(void)
+/*
+ * begin the initialization of the per-blade control structures
+ */
+static struct bau_control * __init uv_table_bases_init(int blade, int node)
 {
-	remove_proc_entry(UV_PTC_BASENAME, NULL);
+	int i;
+	int *ip;
+	struct bau_msg_status *msp;
+	struct bau_control *bau_tablesp;
+
+	bau_tablesp =
+	    kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node);
+	if (!bau_tablesp)
+		BUG();
+	bau_tablesp->msg_statuses =
+	    kmalloc_node(sizeof(struct bau_msg_status) *
+			 DESTINATION_PAYLOAD_QUEUE_SIZE, GFP_KERNEL, node);
+	if (!bau_tablesp->msg_statuses)
+		BUG();
+	for (i = 0, msp = bau_tablesp->msg_statuses;
+	     i < DESTINATION_PAYLOAD_QUEUE_SIZE; i++, msp++) {
+		bau_cpubits_clear(&msp->seen_by, (int)
+				  uv_blade_nr_possible_cpus(blade));
+	}
+	bau_tablesp->watching =
+	    kmalloc_node(sizeof(int) * DESTINATION_NUM_RESOURCES,
+			 GFP_KERNEL, node);
+	if (!bau_tablesp->watching)
+		BUG();
+	for (i = 0, ip = bau_tablesp->watching;
+	     i < DESTINATION_PAYLOAD_QUEUE_SIZE; i++, ip++) {
+		*ip = 0;
+	}
+	uv_bau_table_bases[blade] = bau_tablesp;
+	return bau_tablesp;
 }
 
-module_init(uv_ptc_init);
-module_exit(uv_ptc_exit);
+/*
+ * finish the initialization of the per-blade control structures
+ */
+static void __init uv_table_bases_finish(int blade, int node, int cur_cpu,
+				  struct bau_control *bau_tablesp,
+				  struct bau_activation_descriptor *adp)
+{
+	int i;
+	struct bau_control *bcp;
+
+	for (i = cur_cpu; i < (cur_cpu + uv_blade_nr_possible_cpus(blade));
+	     i++) {
+		bcp = (struct bau_control *)&per_cpu(bau_control, i);
+		bcp->bau_msg_head = bau_tablesp->va_queue_first;
+		bcp->va_queue_first = bau_tablesp->va_queue_first;
+		bcp->va_queue_last = bau_tablesp->va_queue_last;
+		bcp->watching = bau_tablesp->watching;
+		bcp->msg_statuses = bau_tablesp->msg_statuses;
+		bcp->descriptor_base = adp;
+	}
+}
 
 /*
- * Initialization of BAU-related structures
+ * initialize the sending side's sending buffers
  */
-int __init
-uv_bau_init(void)
+static struct bau_activation_descriptor * __init
+uv_activation_descriptor_init(int node, int pnode)
 {
 	int i;
-	int j;
-	int blade;
-	int nblades;
-	int *ip;
-	int pnode;
-	int last_blade;
-	int cur_cpu = 0;
 	unsigned long pa;
-	unsigned long n;
 	unsigned long m;
+	unsigned long n;
 	unsigned long mmr_image;
-	unsigned long apicid;
+	struct bau_activation_descriptor *adp;
+	struct bau_activation_descriptor *ad2;
+
+	adp = (struct bau_activation_descriptor *)
+	    kmalloc_node(16384, GFP_KERNEL, node);
+	if (!adp)
+		BUG();
+	pa = __pa((unsigned long)adp);
+	n = pa >> uv_nshift;
+	m = pa & uv_mmask;
+	mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE);
+	if (mmr_image)
+		uv_write_global_mmr64(pnode, (unsigned long)
+				      UVH_LB_BAU_SB_DESCRIPTOR_BASE,
+				      (n << UV_DESC_BASE_PNODE_SHIFT | m));
+	for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {
+		memset(ad2, 0, sizeof(struct bau_activation_descriptor));
+		ad2->header.sw_ack_flag = 1;
+		ad2->header.base_dest_nodeid =
+		    uv_blade_to_pnode(uv_cpu_to_blade_id(0));
+		ad2->header.command = UV_NET_ENDPOINT_INTD;
+		ad2->header.int_both = 1;
+		/*
+		 * all others need to be set to zero:
+		 *   fairness chaining multilevel count replied_to
+		 */
+	}
+	return adp;
+}
+
+/*
+ * initialize the destination side's receiving buffers
+ */
+static struct bau_payload_queue_entry * __init uv_payload_queue_init(int node,
+				int pnode, struct bau_control *bau_tablesp)
+{
 	char *cp;
-	struct bau_control *bau_tablesp;
-	struct bau_activation_descriptor *adp, *ad2;
 	struct bau_payload_queue_entry *pqp;
-	struct bau_msg_status *msp;
-	struct bau_control *bcp;
 
-	if (!is_uv_system())
-		return 0;
+	pqp = (struct bau_payload_queue_entry *)
+	    kmalloc_node((DESTINATION_PAYLOAD_QUEUE_SIZE + 1) *
+			 sizeof(struct bau_payload_queue_entry),
+			 GFP_KERNEL, node);
+	if (!pqp)
+		BUG();
+	cp = (char *)pqp + 31;
+	pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
+	bau_tablesp->va_queue_first = pqp;
+	uv_write_global_mmr64(pnode,
+			      UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
+			      ((unsigned long)pnode <<
+			       UV_PAYLOADQ_PNODE_SHIFT) |
+			      uv_physnodeaddr(pqp));
+	uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
+			      uv_physnodeaddr(pqp));
+	bau_tablesp->va_queue_last =
+	    pqp + (DESTINATION_PAYLOAD_QUEUE_SIZE - 1);
+	uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
+			      (unsigned long)
+			      uv_physnodeaddr(bau_tablesp->va_queue_last));
+	memset(pqp, 0, sizeof(struct bau_payload_queue_entry) *
+	       DESTINATION_PAYLOAD_QUEUE_SIZE);
+	return pqp;
+}
 
-	uv_bau_retry_limit = 1;
+/*
+ * Initialization of each UV blade's structures
+ */
+static int __init uv_init_blade(int blade, int node, int cur_cpu)
+{
+	int pnode;
+	unsigned long pa;
+	unsigned long apicid;
+	struct bau_activation_descriptor *adp;
+	struct bau_payload_queue_entry *pqp;
+	struct bau_control *bau_tablesp;
 
-	if ((sizeof(struct bau_local_cpumask) * BITSPERBYTE) <
-	    MAX_CPUS_PER_NODE) {
-		printk(KERN_ERR
-			"uv_bau_init: bau_local_cpumask.bits too small\n");
-		BUG();
+	bau_tablesp = uv_table_bases_init(blade, node);
+	pnode = uv_blade_to_pnode(blade);
+	adp = uv_activation_descriptor_init(node, pnode);
+	pqp = uv_payload_queue_init(node, pnode, bau_tablesp);
+	uv_table_bases_finish(blade, node, cur_cpu, bau_tablesp, adp);
+	/*
+	 * the below initialization can't be in firmware because the
+	 * messaging IRQ will be determined by the OS
+	 */
+	apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
+	pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
+	if ((pa & 0xff) != UV_BAU_MESSAGE) {
+		uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
+				      ((apicid << 32) | UV_BAU_MESSAGE));
 	}
+	return 0;
+}
+
+/*
+ * Initialization of BAU-related structures
+ */
+static int __init uv_bau_init(void)
+{
+	int blade;
+	int node;
+	int nblades;
+	int last_blade;
+	int cur_cpu = 0;
+
+	if (!is_uv_system())
+		return 0;
 
+	uv_bau_retry_limit = 1;
 	uv_nshift = uv_hub_info->n_val;
 	uv_mmask = ((unsigned long)1 << uv_hub_info->n_val) - 1;
 	nblades = 0;
 	last_blade = -1;
-	for_each_online_node(i) {
-		blade = uv_node_to_blade_id(i);
+	for_each_online_node(node) {
+		blade = uv_node_to_blade_id(node);
 		if (blade == last_blade)
 			continue;
 		last_blade = blade;
 		nblades++;
 	}
-
 	uv_bau_table_bases = (struct bau_control **)
 	    kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
 	if (!uv_bau_table_bases)
 		BUG();
-
-	/* better if we had each_online_blade */
 	last_blade = -1;
-	for_each_online_node(i) {
-		blade = uv_node_to_blade_id(i);
+	for_each_online_node(node) {
+		blade = uv_node_to_blade_id(node);
 		if (blade == last_blade)
 			continue;
 		last_blade = blade;
-
-		bau_tablesp =
-		    kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, i);
-		if (!bau_tablesp)
-			BUG();
-
-		bau_tablesp->msg_statuses =
-		    kmalloc_node(sizeof(struct bau_msg_status) *
-				 DESTINATION_PAYLOAD_QUEUE_SIZE, GFP_KERNEL, i);
-		if (!bau_tablesp->msg_statuses)
-			BUG();
-		for (j = 0, msp = bau_tablesp->msg_statuses;
-		     j < DESTINATION_PAYLOAD_QUEUE_SIZE; j++, msp++) {
-			bau_cpubits_clear(&msp->seen_by, (int)
-					  uv_blade_nr_possible_cpus(blade));
-		}
-
-		bau_tablesp->watching =
-		    kmalloc_node(sizeof(int) * DESTINATION_NUM_RESOURCES,
-				 GFP_KERNEL, i);
-		if (!bau_tablesp->watching)
-			BUG();
-		for (j = 0, ip = bau_tablesp->watching;
-		     j < DESTINATION_PAYLOAD_QUEUE_SIZE; j++, ip++) {
-			*ip = 0;
-		}
-
-		uv_bau_table_bases[i] = bau_tablesp;
-
-		pnode = uv_blade_to_pnode(blade);
-
-		if (sizeof(struct bau_activation_descriptor) != 64)
-			BUG();
-
-		adp = (struct bau_activation_descriptor *)
-		    kmalloc_node(16384, GFP_KERNEL, i);
-		if (!adp)
-			BUG();
-		if ((unsigned long)adp & 0xfff)
-			BUG();
-		pa = __pa((unsigned long)adp);
-		n = pa >> uv_nshift;
-		m = pa & uv_mmask;
-
-		mmr_image = uv_read_global_mmr64(pnode,
-						 UVH_LB_BAU_SB_DESCRIPTOR_BASE);
-		if (mmr_image)
-			uv_write_global_mmr64(pnode, (unsigned long)
-					      UVH_LB_BAU_SB_DESCRIPTOR_BASE,
-					      (n << UV_DESC_BASE_PNODE_SHIFT |
-					       m));
-		for (j = 0, ad2 = adp; j < UV_ACTIVATION_DESCRIPTOR_SIZE;
-		     j++, ad2++) {
-			memset(ad2, 0,
-			       sizeof(struct bau_activation_descriptor));
-			ad2->header.sw_ack_flag = 1;
-			ad2->header.base_dest_nodeid =
-			    uv_blade_to_pnode(uv_cpu_to_blade_id(0));
-			ad2->header.command = UV_NET_ENDPOINT_INTD;
-			ad2->header.int_both = 1;
-			/* all others need to be set to zero:
-			   fairness chaining multilevel count replied_to */
-		}
-
-		pqp = (struct bau_payload_queue_entry *)
-		    kmalloc_node((DESTINATION_PAYLOAD_QUEUE_SIZE + 1) *
-				 sizeof(struct bau_payload_queue_entry),
-				 GFP_KERNEL, i);
-		if (!pqp)
-			BUG();
-		if (sizeof(struct bau_payload_queue_entry) != 32)
-			BUG();
-		if ((unsigned long)(&((struct bau_payload_queue_entry *)0)->
-				    sw_ack_vector) != 15)
-			BUG();
-
-		cp = (char *)pqp + 31;
-		pqp = (struct bau_payload_queue_entry *)
-		    (((unsigned long)cp >> 5) << 5);
-		bau_tablesp->va_queue_first = pqp;
-		uv_write_global_mmr64(pnode,
-				      UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
-				      ((unsigned long)pnode <<
-				       UV_PAYLOADQ_PNODE_SHIFT) |
-				      uv_physnodeaddr(pqp));
-		uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
-				      uv_physnodeaddr(pqp));
-		bau_tablesp->va_queue_last =
-		    pqp + (DESTINATION_PAYLOAD_QUEUE_SIZE - 1);
-		uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
-				      (unsigned long)
-				      uv_physnodeaddr(bau_tablesp->
-						      va_queue_last));
-		memset(pqp, 0, sizeof(struct bau_payload_queue_entry) *
-		       DESTINATION_PAYLOAD_QUEUE_SIZE);
-
-		/* this initialization can't be in firmware because the
-		   messaging IRQ will be determined by the OS */
-		apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
-		pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
-		if ((pa & 0xff) != UV_BAU_MESSAGE) {
-			uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
-					      ((apicid << 32) |
-					       UV_BAU_MESSAGE));
-		}
-
-		for (j = cur_cpu; j < (cur_cpu + uv_blade_nr_possible_cpus(i));
-		     j++) {
-			bcp = (struct bau_control *)&per_cpu(bau_control, j);
-			bcp->bau_msg_head = bau_tablesp->va_queue_first;
-			bcp->va_queue_first = bau_tablesp->va_queue_first;
-
-			bcp->va_queue_last = bau_tablesp->va_queue_last;
-			bcp->watching = bau_tablesp->watching;
-			bcp->msg_statuses = bau_tablesp->msg_statuses;
-			bcp->descriptor_base = adp;
-		}
-		cur_cpu += uv_blade_nr_possible_cpus(i);
+		uv_init_blade(blade, node, cur_cpu);
+		cur_cpu += uv_blade_nr_possible_cpus(blade);
 	}
-
 	set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
-
 	uv_enable_timeouts();
-
 	return 0;
 }
-
 __initcall(uv_bau_init);
+__initcall(uv_ptc_init);
diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h
index f125f86c89a..e52fec82266 100644
--- a/include/asm-x86/uv/uv_bau.h
+++ b/include/asm-x86/uv/uv_bau.h
@@ -14,9 +14,9 @@
 #include <linux/bitmap.h>
 #define BITSPERBYTE 8
 
-/* Broadcast Assist Unit messaging structures */
-
 /*
+ * Broadcast Assist Unit messaging structures
+ *
  * Selective Broadcast activations are induced by software action
  * specifying a particular 8-descriptor "set" via a 6-bit index written
  * to an MMR.
@@ -33,54 +33,73 @@
  * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
  */
 
-#define UV_ITEMS_PER_DESCRIPTOR 8
-#define UV_CPUS_PER_ACT_STATUS 32
-#define UV_ACT_STATUS_MASK 0x3
-#define UV_ACT_STATUS_SIZE 2
-#define UV_ACTIVATION_DESCRIPTOR_SIZE 32
-#define UV_DISTRIBUTION_SIZE 256
-#define UV_SW_ACK_NPENDING 8
-#define UV_BAU_MESSAGE 200	/* Messaging irq; see irq_64.h */
-				/* and include/asm-x86/hw_irq_64.h */
-				/* To be dynamically allocated in the future */
-#define UV_NET_ENDPOINT_INTD 0x38
-#define UV_DESC_BASE_PNODE_SHIFT 49 /* position of pnode (nasid>>1) in MMR */
-#define UV_PAYLOADQ_PNODE_SHIFT 49
-
-#define UV_PTC_BASENAME "sgi_uv/ptc_statistics"
-#define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask))
-
-/* bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 */
+#define UV_ITEMS_PER_DESCRIPTOR		8
+#define UV_CPUS_PER_ACT_STATUS		32
+#define UV_ACT_STATUS_MASK		0x3
+#define UV_ACT_STATUS_SIZE		2
+#define UV_ACTIVATION_DESCRIPTOR_SIZE	32
+#define UV_DISTRIBUTION_SIZE		256
+#define UV_SW_ACK_NPENDING		8
+#define UV_BAU_MESSAGE			200
+/*
+ * Messaging irq; see irq_64.h and include/asm-x86/hw_irq_64.h
+ * To be dynamically allocated in the future
+ */
+#define UV_NET_ENDPOINT_INTD		0x38
+#define UV_DESC_BASE_PNODE_SHIFT	49
+#define UV_PAYLOADQ_PNODE_SHIFT		49
+#define UV_PTC_BASENAME			"sgi_uv/ptc_statistics"
+#define uv_physnodeaddr(x)		((__pa((unsigned long)(x)) & uv_mmask))
+
+/*
+ * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1
+ */
 #define DESC_STATUS_IDLE                0
 #define DESC_STATUS_ACTIVE              1
 #define DESC_STATUS_DESTINATION_TIMEOUT 2
 #define DESC_STATUS_SOURCE_TIMEOUT      3
 
-/* source side threshholds at which message retries print a warning */
+/*
+ * source side threshholds at which message retries print a warning
+ */
 #define SOURCE_TIMEOUT_LIMIT            20
 #define DESTINATION_TIMEOUT_LIMIT       20
 
-/* number of entries in the destination side payload queue */
+/*
+ * number of entries in the destination side payload queue
+ */
 #define DESTINATION_PAYLOAD_QUEUE_SIZE  17
-/* number of destination side software ack resources */
+/*
+ * number of destination side software ack resources
+ */
 #define DESTINATION_NUM_RESOURCES       8
 #define MAX_CPUS_PER_NODE		32
+/*
+ * completion statuses for sending a TLB flush message
+ */
+#define	FLUSH_RETRY			1
+#define	FLUSH_GIVEUP			2
+#define	FLUSH_COMPLETE			3
 
-/* Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) */
-/* If the 'multilevel' flag in the header portion of the descriptor
+/*
+ * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)
+ * If the 'multilevel' flag in the header portion of the descriptor
  * has been set to 0, then endpoint multi-unicast mode is selected.
  * The distribution specification (32 bytes) is interpreted as a 256-bit
  * distribution vector. Adjacent bits correspond to consecutive even numbered
  * nodeIDs. The result of adding the index of a given bit to the 15-bit
  * 'base_dest_nodeid' field of the header corresponds to the
- * destination nodeID associated with that specified bit.  */
+ * destination nodeID associated with that specified bit.
+ */
 struct bau_target_nodemask {
 	unsigned long bits[BITS_TO_LONGS(256)];
 };
 
-/* mask of cpu's on a node */
-/* (during initialization we need to check that unsigned long has
-    enough bits for max. cpu's per node) */
+/*
+ * mask of cpu's on a node
+ * (during initialization we need to check that unsigned long has
+ *  enough bits for max. cpu's per node)
+ */
 struct bau_local_cpumask {
 	unsigned long bits;
 };
@@ -99,7 +118,9 @@ struct bau_local_cpumask {
  *   the s/w ack bit vector  ]
  */
 
-/* The payload is software-defined for INTD transactions */
+/*
+ * The payload is software-defined for INTD transactions
+ */
 struct bau_msg_payload {
 	unsigned long address;		/* signifies a page or all TLB's
 						of the cpu */
@@ -112,8 +133,10 @@ struct bau_msg_payload {
 };
 
 
-/* Message header:  16 bytes (128 bits) (bytes 0x30-0x3f of descriptor) */
-/* see table 4.2.3.0.1 in broacast_assist spec. */
+/*
+ * Message header:  16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
+ * see table 4.2.3.0.1 in broacast_assist spec.
+ */
 struct bau_msg_header {
 	int dest_subnodeid:6;	/* must be zero */
 	/* bits 5:0 */
@@ -173,11 +196,15 @@ struct bau_msg_header {
 	/* bits 127:107 */
 };
 
-/* The format of the message to send, plus all accompanying control */
-/* Should be 64 bytes */
+/*
+ * The format of the message to send, plus all accompanying control
+ * Should be 64 bytes
+ */
 struct bau_activation_descriptor {
 	struct bau_target_nodemask distribution;
-	/* message template, consisting of header and payload: */
+	/*
+	 * message template, consisting of header and payload:
+	 */
 	struct bau_msg_header header;
 	struct bau_msg_payload payload;
 };
@@ -235,18 +262,24 @@ struct bau_payload_queue_entry {
 	/* bytes 24-31 */
 };
 
-/* one for every slot in the destination payload queue */
+/*
+ * one for every slot in the destination payload queue
+ */
 struct bau_msg_status {
 	struct bau_local_cpumask seen_by;	/* map of cpu's */
 };
 
-/* one for every slot in the destination software ack resources */
+/*
+ * one for every slot in the destination software ack resources
+ */
 struct bau_sw_ack_status {
 	struct bau_payload_queue_entry *msg;	/* associated message */
 	int watcher;				/* cpu monitoring, or -1 */
 };
 
-/* one on every node and per-cpu; to locate the software tables */
+/*
+ * one on every node and per-cpu; to locate the software tables
+ */
 struct bau_control {
 	struct bau_activation_descriptor *descriptor_base;
 	struct bau_payload_queue_entry *bau_msg_head;
@@ -267,8 +300,8 @@ struct ptc_stats {
 	unsigned long onetlb;	/* times just one tlb on this cpu was flushed */
 	unsigned long s_retry;	/* retries on source side timeouts */
 	unsigned long d_retry;	/* retries on destination side timeouts */
-	unsigned long sflush_ns;/* nanoseconds spent in uv_flush_tlb_others */
-	unsigned long dflush_ns;/* nanoseconds spent destination side */
+	unsigned long sflush;	/* cycles spent in uv_flush_tlb_others */
+	unsigned long dflush;	/* cycles spent on destination side */
 	unsigned long retriesok; /* successes on retries */
 	unsigned long nomsg;	/* interrupts with no message */
 	unsigned long multmsg;	/* interrupts with multiple messages */
@@ -293,39 +326,11 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
 	bitmap_zero(&dstp->bits, nbits);
 }
 
-/*
- * atomic increment of a short integer
- * (rather than using the __sync_add_and_fetch() intrinsic)
- *
- * returns the new value of the variable
- */
-static inline short int atomic_inc_short(short int *v)
-{
-	asm volatile("movw $1, %%cx\n"
-			"lock ; xaddw %%cx, %0\n"
-			: "+m" (*v)		/* outputs */
-			: : "%cx", "memory");	/* inputs : clobbereds */
-	return *v;
-}
-
-/*
- * atomic OR of two long integers
- * (rather than using the __sync_or_and_fetch() intrinsic)
- */
-static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
-{
-	asm volatile("movq %0, %%rax; lea %1, %%rdx\n"
-			"lock ; orq %%rax, %%rdx\n"
-			: "+m" (*v1)		/* outputs */
-			: "m" (v1), "m" (v2)	/* inputs */
-			: "memory");		/* clobbereds */
-}
-
 #define cpubit_isset(cpu, bau_local_cpumask) \
 	test_bit((cpu), (bau_local_cpumask).bits)
 
-int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);
-void uv_bau_message_intr1(void);
-void uv_bau_timeout_intr1(void);
+extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);
+extern void uv_bau_message_intr1(void);
+extern void uv_bau_timeout_intr1(void);
 
 #endif /* __ASM_X86_UV_BAU__ */
-- 
cgit v1.2.3-70-g09d2


From dc163a41ffba22a6ef70b51e7ddf68aa13b4b414 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 18 Jun 2008 14:15:43 +0200
Subject: SGI UV: TLB shootdown using broadcast assist unit

TLB shootdown for SGI UV.

v5: 6/12 corrections/improvements per Ingo's second review

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/tlb_uv.c    | 183 ++++++++++++++++++++------------------------
 include/asm-x86/uv/uv_bau.h |  21 ++---
 2 files changed, 96 insertions(+), 108 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index f7bc6a6fbe4..d8705e97e8d 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -25,15 +25,8 @@ static int uv_bau_retry_limit __read_mostly;
 static int uv_nshift __read_mostly; /* position of pnode (which is nasid>>1) */
 static unsigned long uv_mmask __read_mostly;
 
-char *status_table[] = {
-	"IDLE",
-	"ACTIVE",
-	"DESTINATION TIMEOUT",
-	"SOURCE TIMEOUT"
-};
-
-DEFINE_PER_CPU(struct ptc_stats, ptcstats);
-DEFINE_PER_CPU(struct bau_control, bau_control);
+static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
+static DEFINE_PER_CPU(struct bau_control, bau_control);
 
 /*
  * Free a software acknowledge hardware resource by clearing its Pending
@@ -55,7 +48,6 @@ static void uv_reply_to_message(int resource,
 	if (msp)
 		msp->seen_by.bits = 0;
 	uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw);
-	return;
 }
 
 /*
@@ -73,7 +65,7 @@ static void uv_bau_process_message(struct bau_payload_queue_entry *msg,
 	cpu = uv_blade_processor_id();
 	msg->number_of_cpus =
 	    uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
-	this_cpu_mask = (unsigned long)1 << cpu;
+	this_cpu_mask = 1UL << cpu;
 	if (msp->seen_by.bits & this_cpu_mask)
 		return;
 	atomic_or_long(&msp->seen_by.bits, this_cpu_mask);
@@ -94,53 +86,60 @@ static void uv_bau_process_message(struct bau_payload_queue_entry *msg,
 	atomic_inc_short(&msg->acknowledge_count);
 	if (msg->number_of_cpus == msg->acknowledge_count)
 		uv_reply_to_message(sw_ack_slot, msg, msp);
-	return;
 }
 
 /*
- * Examine the payload queue on all the distribution nodes to see
+ * Examine the payload queue on one distribution node to see
  * which messages have not been seen, and which cpu(s) have not seen them.
  *
  * Returns the number of cpu's that have not responded.
  */
-static int uv_examine_destinations(struct bau_target_nodemask *distribution)
+static int uv_examine_destination(struct bau_control *bau_tablesp, int sender)
 {
-	int sender;
 	int i;
 	int j;
-	int k;
 	int count = 0;
-	struct bau_control *bau_tablesp;
 	struct bau_payload_queue_entry *msg;
 	struct bau_msg_status *msp;
 
+	for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE;
+	     msg++, i++) {
+		if ((msg->sending_cpu == sender) && (!msg->replied_to)) {
+			msp = bau_tablesp->msg_statuses + i;
+			printk(KERN_DEBUG
+			       "blade %d: address:%#lx %d of %d, not cpu(s): ",
+			       i, msg->address, msg->acknowledge_count,
+			       msg->number_of_cpus);
+			for (j = 0; j < msg->number_of_cpus; j++) {
+				if (!((long)1 << j & msp-> seen_by.bits)) {
+					count++;
+					printk("%d ", j);
+				}
+			}
+			printk("\n");
+		}
+	}
+	return count;
+}
+
+/*
+ * Examine the payload queue on all the distribution nodes to see
+ * which messages have not been seen, and which cpu(s) have not seen them.
+ *
+ * Returns the number of cpu's that have not responded.
+ */
+static int uv_examine_destinations(struct bau_target_nodemask *distribution)
+{
+	int sender;
+	int i;
+	int count = 0;
+
 	sender = smp_processor_id();
 	for (i = 0; i < (sizeof(struct bau_target_nodemask) * BITSPERBYTE);
 	     i++) {
 		if (!bau_node_isset(i, distribution))
 			continue;
-		bau_tablesp = uv_bau_table_bases[i];
-		for (msg = bau_tablesp->va_queue_first, j = 0;
-		     j < DESTINATION_PAYLOAD_QUEUE_SIZE; msg++, j++) {
-			if ((msg->sending_cpu == sender) &&
-			    (!msg->replied_to)) {
-				msp = bau_tablesp->msg_statuses + j;
-				printk(KERN_DEBUG
-				"blade %d: address:%#lx %d of %d, not cpu(s): ",
-				       i, msg->address,
-				       msg->acknowledge_count,
-				       msg->number_of_cpus);
-				for (k = 0; k < msg->number_of_cpus;
-				     k++) {
-					if (!((long)1 << k & msp->
-					      seen_by.bits)) {
-						count++;
-						printk("%d ", k);
-					}
-				}
-				printk("\n");
-			}
-		}
+		count += uv_examine_destination(uv_bau_table_bases[i], sender);
 	}
 	return count;
 }
@@ -150,7 +149,7 @@ static int uv_examine_destinations(struct bau_target_nodemask *distribution)
  *
  * return COMPLETE, RETRY or GIVEUP
  */
-static int uv_wait_completion(struct bau_activation_descriptor *bau_desc,
+static int uv_wait_completion(struct bau_desc *bau_desc,
 			      unsigned long mmr_offset, int right_shift)
 {
 	int exams = 0;
@@ -213,8 +212,8 @@ static int uv_wait_completion(struct bau_activation_descriptor *bau_desc,
  * Returns 0 if some remote flushing remains to be done. The mask is left
  * unchanged.
  */
-int uv_flush_send_and_wait(int cpu, int this_blade,
-	struct bau_activation_descriptor *bau_desc, cpumask_t *cpumaskp)
+int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
+			   cpumask_t *cpumaskp)
 {
 	int completion_status = 0;
 	int right_shift;
@@ -237,8 +236,8 @@ int uv_flush_send_and_wait(int cpu, int this_blade,
 	time1 = get_cycles();
 	do {
 		tries++;
-		index = ((unsigned long)
-			1 << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | cpu;
+		index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) |
+			cpu;
 		uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
 		completion_status = uv_wait_completion(bau_desc, mmr_offset,
 					right_shift);
@@ -303,7 +302,7 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
 	int cpu;
 	int this_blade;
 	int locals = 0;
-	struct bau_activation_descriptor *bau_desc;
+	struct bau_desc *bau_desc;
 
 	cpu = uv_blade_processor_id();
 	this_blade = uv_numa_blade_id();
@@ -315,8 +314,7 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
 	i = 0;
 	for_each_cpu_mask(bit, *cpumaskp) {
 		blade = uv_cpu_to_blade_id(bit);
-		if (blade > (UV_DISTRIBUTION_SIZE - 1))
-			BUG();
+		BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
 		if (blade == this_blade) {
 			locals++;
 			continue;
@@ -360,6 +358,8 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
 {
 	struct bau_payload_queue_entry *pqp;
 	struct bau_payload_queue_entry *msg;
+	struct bau_payload_queue_entry *va_queue_first;
+	struct bau_payload_queue_entry *va_queue_last;
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	cycles_t time1, time2;
 	int msg_slot;
@@ -376,7 +376,8 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
 
 	local_pnode = uv_blade_to_pnode(uv_numa_blade_id());
 
-	pqp = __get_cpu_var(bau_control).va_queue_first;
+	pqp = va_queue_first = __get_cpu_var(bau_control).va_queue_first;
+	va_queue_last = __get_cpu_var(bau_control).va_queue_last;
 	msg = __get_cpu_var(bau_control).bau_msg_head;
 	while (msg->sw_ack_vector) {
 		count++;
@@ -387,8 +388,8 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
 		uv_bau_process_message(msg, msg_slot, sw_ack_slot);
 
 		msg++;
-		if (msg > __get_cpu_var(bau_control).va_queue_last)
-			msg = __get_cpu_var(bau_control).va_queue_first;
+		if (msg > va_queue_last)
+			msg = va_queue_first;
 		__get_cpu_var(bau_control).bau_msg_head = msg;
 	}
 	if (!count)
@@ -401,7 +402,6 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
 
 	irq_exit();
 	set_irq_regs(old_regs);
-	return;
 }
 
 static void uv_enable_timeouts(void)
@@ -423,7 +423,6 @@ static void uv_enable_timeouts(void)
 		pnode = uv_blade_to_pnode(blade);
 		cur_cpu += uv_blade_nr_possible_cpus(i);
 	}
-	return;
 }
 
 static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
@@ -535,10 +534,10 @@ static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,
 }
 
 static const struct seq_operations uv_ptc_seq_ops = {
-	.start	= uv_ptc_seq_start,
-	.next	= uv_ptc_seq_next,
-	.stop	= uv_ptc_seq_stop,
-	.show	= uv_ptc_seq_show
+	.start		= uv_ptc_seq_start,
+	.next		= uv_ptc_seq_next,
+	.stop		= uv_ptc_seq_stop,
+	.show		= uv_ptc_seq_show
 };
 
 static int uv_ptc_proc_open(struct inode *inode, struct file *file)
@@ -568,6 +567,7 @@ static int __init uv_ptc_init(void)
 	if (!proc_uv_ptc) {
 		printk(KERN_ERR "unable to create %s proc entry\n",
 		       UV_PTC_BASENAME);
+		remove_proc_entry("sgi_uv", NULL);
 		return -EINVAL;
 	}
 	proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
@@ -582,33 +582,26 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
 	int i;
 	int *ip;
 	struct bau_msg_status *msp;
-	struct bau_control *bau_tablesp;
+	struct bau_control *bau_tabp;
 
-	bau_tablesp =
+	bau_tabp =
 	    kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node);
-	if (!bau_tablesp)
-		BUG();
-	bau_tablesp->msg_statuses =
+	BUG_ON(!bau_tabp);
+	bau_tabp->msg_statuses =
 	    kmalloc_node(sizeof(struct bau_msg_status) *
-			 DESTINATION_PAYLOAD_QUEUE_SIZE, GFP_KERNEL, node);
-	if (!bau_tablesp->msg_statuses)
-		BUG();
-	for (i = 0, msp = bau_tablesp->msg_statuses;
-	     i < DESTINATION_PAYLOAD_QUEUE_SIZE; i++, msp++) {
+			 DEST_Q_SIZE, GFP_KERNEL, node);
+	BUG_ON(!bau_tabp->msg_statuses);
+	for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++)
 		bau_cpubits_clear(&msp->seen_by, (int)
 				  uv_blade_nr_possible_cpus(blade));
-	}
-	bau_tablesp->watching =
-	    kmalloc_node(sizeof(int) * DESTINATION_NUM_RESOURCES,
-			 GFP_KERNEL, node);
-	if (!bau_tablesp->watching)
-		BUG();
-	for (i = 0, ip = bau_tablesp->watching;
-	     i < DESTINATION_PAYLOAD_QUEUE_SIZE; i++, ip++) {
+	bau_tabp->watching =
+	    kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node);
+	BUG_ON(!bau_tabp->watching);
+	for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++) {
 		*ip = 0;
 	}
-	uv_bau_table_bases[blade] = bau_tablesp;
-	return bau_tablesp;
+	uv_bau_table_bases[blade] = bau_tabp;
+	return bau_tabsp;
 }
 
 /*
@@ -616,7 +609,7 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
  */
 static void __init uv_table_bases_finish(int blade, int node, int cur_cpu,
 				  struct bau_control *bau_tablesp,
-				  struct bau_activation_descriptor *adp)
+				  struct bau_desc *adp)
 {
 	int i;
 	struct bau_control *bcp;
@@ -636,7 +629,7 @@ static void __init uv_table_bases_finish(int blade, int node, int cur_cpu,
 /*
  * initialize the sending side's sending buffers
  */
-static struct bau_activation_descriptor * __init
+static struct bau_desc * __init
 uv_activation_descriptor_init(int node, int pnode)
 {
 	int i;
@@ -644,13 +637,12 @@ uv_activation_descriptor_init(int node, int pnode)
 	unsigned long m;
 	unsigned long n;
 	unsigned long mmr_image;
-	struct bau_activation_descriptor *adp;
-	struct bau_activation_descriptor *ad2;
+	struct bau_desc *adp;
+	struct bau_desc *ad2;
 
-	adp = (struct bau_activation_descriptor *)
+	adp = (struct bau_desc *)
 	    kmalloc_node(16384, GFP_KERNEL, node);
-	if (!adp)
-		BUG();
+	BUG_ON(!adp);
 	pa = __pa((unsigned long)adp);
 	n = pa >> uv_nshift;
 	m = pa & uv_mmask;
@@ -660,7 +652,7 @@ uv_activation_descriptor_init(int node, int pnode)
 				      UVH_LB_BAU_SB_DESCRIPTOR_BASE,
 				      (n << UV_DESC_BASE_PNODE_SHIFT | m));
 	for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {
-		memset(ad2, 0, sizeof(struct bau_activation_descriptor));
+		memset(ad2, 0, sizeof(struct bau_desc));
 		ad2->header.sw_ack_flag = 1;
 		ad2->header.base_dest_nodeid =
 		    uv_blade_to_pnode(uv_cpu_to_blade_id(0));
@@ -683,12 +675,10 @@ static struct bau_payload_queue_entry * __init uv_payload_queue_init(int node,
 	char *cp;
 	struct bau_payload_queue_entry *pqp;
 
-	pqp = (struct bau_payload_queue_entry *)
-	    kmalloc_node((DESTINATION_PAYLOAD_QUEUE_SIZE + 1) *
-			 sizeof(struct bau_payload_queue_entry),
-			 GFP_KERNEL, node);
-	if (!pqp)
-		BUG();
+	pqp = (struct bau_payload_queue_entry *) kmalloc_node(
+		(DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry),
+		GFP_KERNEL, node);
+	BUG_ON(!pqp);
 	cp = (char *)pqp + 31;
 	pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
 	bau_tablesp->va_queue_first = pqp;
@@ -699,13 +689,11 @@ static struct bau_payload_queue_entry * __init uv_payload_queue_init(int node,
 			      uv_physnodeaddr(pqp));
 	uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
 			      uv_physnodeaddr(pqp));
-	bau_tablesp->va_queue_last =
-	    pqp + (DESTINATION_PAYLOAD_QUEUE_SIZE - 1);
+	bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1);
 	uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
 			      (unsigned long)
 			      uv_physnodeaddr(bau_tablesp->va_queue_last));
-	memset(pqp, 0, sizeof(struct bau_payload_queue_entry) *
-	       DESTINATION_PAYLOAD_QUEUE_SIZE);
+	memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE);
 	return pqp;
 }
 
@@ -717,7 +705,7 @@ static int __init uv_init_blade(int blade, int node, int cur_cpu)
 	int pnode;
 	unsigned long pa;
 	unsigned long apicid;
-	struct bau_activation_descriptor *adp;
+	struct bau_desc *adp;
 	struct bau_payload_queue_entry *pqp;
 	struct bau_control *bau_tablesp;
 
@@ -755,7 +743,7 @@ static int __init uv_bau_init(void)
 
 	uv_bau_retry_limit = 1;
 	uv_nshift = uv_hub_info->n_val;
-	uv_mmask = ((unsigned long)1 << uv_hub_info->n_val) - 1;
+	uv_mmask = (1UL << uv_hub_info->n_val) - 1;
 	nblades = 0;
 	last_blade = -1;
 	for_each_online_node(node) {
@@ -767,8 +755,7 @@ static int __init uv_bau_init(void)
 	}
 	uv_bau_table_bases = (struct bau_control **)
 	    kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
-	if (!uv_bau_table_bases)
-		BUG();
+	BUG_ON(!uv_bau_table_bases);
 	last_blade = -1;
 	for_each_online_node(node) {
 		blade = uv_node_to_blade_id(node);
diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h
index e52fec82266..91ac0dfb758 100644
--- a/include/asm-x86/uv/uv_bau.h
+++ b/include/asm-x86/uv/uv_bau.h
@@ -54,25 +54,25 @@
 /*
  * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1
  */
-#define DESC_STATUS_IDLE                0
-#define DESC_STATUS_ACTIVE              1
-#define DESC_STATUS_DESTINATION_TIMEOUT 2
-#define DESC_STATUS_SOURCE_TIMEOUT      3
+#define DESC_STATUS_IDLE		0
+#define DESC_STATUS_ACTIVE		1
+#define DESC_STATUS_DESTINATION_TIMEOUT	2
+#define DESC_STATUS_SOURCE_TIMEOUT	3
 
 /*
  * source side threshholds at which message retries print a warning
  */
-#define SOURCE_TIMEOUT_LIMIT            20
-#define DESTINATION_TIMEOUT_LIMIT       20
+#define SOURCE_TIMEOUT_LIMIT		20
+#define DESTINATION_TIMEOUT_LIMIT	20
 
 /*
  * number of entries in the destination side payload queue
  */
-#define DESTINATION_PAYLOAD_QUEUE_SIZE  17
+#define DEST_Q_SIZE			17
 /*
  * number of destination side software ack resources
  */
-#define DESTINATION_NUM_RESOURCES       8
+#define DEST_NUM_RESOURCES		8
 #define MAX_CPUS_PER_NODE		32
 /*
  * completion statuses for sending a TLB flush message
@@ -197,10 +197,11 @@ struct bau_msg_header {
 };
 
 /*
+ * The activation descriptor:
  * The format of the message to send, plus all accompanying control
  * Should be 64 bytes
  */
-struct bau_activation_descriptor {
+struct bau_desc {
 	struct bau_target_nodemask distribution;
 	/*
 	 * message template, consisting of header and payload:
@@ -281,7 +282,7 @@ struct bau_sw_ack_status {
  * one on every node and per-cpu; to locate the software tables
  */
 struct bau_control {
-	struct bau_activation_descriptor *descriptor_base;
+	struct bau_desc *descriptor_base;
 	struct bau_payload_queue_entry *bau_msg_head;
 	struct bau_payload_queue_entry *va_queue_first;
 	struct bau_payload_queue_entry *va_queue_last;
-- 
cgit v1.2.3-70-g09d2


From b6df1b8bc1250191cfee15627697111c1cbda53f Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Thu, 19 Jun 2008 21:51:05 -0500
Subject: x86: fix stack overflow for large values of MAX_APICS

physid_mask_of_physid() causes a huge stack (12k) to be created if the
number of APICS is large. Replace physid_mask_of_physid() with a
new function that does not create large stacks. This is a problem only
on large x86_64 systems.

this paves the way to increase MAX_APICS.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: linux-mm@kvack.org
Cc: mingo@elte.hu
Cc: tglx@linutronix.de
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 2 +-
 arch/x86/kernel/apic_64.c | 2 +-
 arch/x86/kernel/smpboot.c | 5 ++---
 include/asm-x86/mpspec.h  | 7 +++++++
 4 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index d5767cb19d5..3f13a1aa07c 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1269,7 +1269,7 @@ int __init APIC_init_uniprocessor(void)
 #ifdef CONFIG_CRASH_DUMP
 	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 #endif
-	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
 
 	setup_local_APIC();
 
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 0633cfd0dc2..5e22f8d2d50 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -942,7 +942,7 @@ int __init APIC_init_uniprocessor(void)
 
 	verify_local_APIC();
 
-	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
 	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
 
 	setup_local_APIC();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3e1cecedde4..664a5db36d4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1091,10 +1091,9 @@ static __init void disable_smp(void)
 	smpboot_clear_io_apic_irqs();
 #endif
 	if (smp_found_config)
-		phys_cpu_present_map =
-				physid_mask_of_physid(boot_cpu_physical_apicid);
+		physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
 	else
-		phys_cpu_present_map = physid_mask_of_physid(0);
+		physid_set_mask_of_physid(0, &phys_cpu_present_map);
 	map_cpu_to_logical_apicid();
 	cpu_set(0, per_cpu(cpu_sibling_map, 0));
 	cpu_set(0, per_cpu(cpu_core_map, 0));
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index 57a991b9c05..b69e7ba7bf1 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -101,6 +101,7 @@ typedef struct physid_mask physid_mask_t;
 		__physid_mask;						\
 	})
 
+/* Note: will create very large stack frames if physid_mask_t is big */
 #define physid_mask_of_physid(physid)					\
 	({								\
 		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
@@ -108,6 +109,12 @@ typedef struct physid_mask physid_mask_t;
 		__physid_mask;						\
 	})
 
+static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
+{
+	physids_clear(*map);
+	physid_set(physid, *map);
+}
+
 #define PHYSID_MASK_ALL		{ {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
 #define PHYSID_MASK_NONE	{ {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
 
-- 
cgit v1.2.3-70-g09d2


From ab9c0bb8a8c1d71dd303abdaa61ec496128e2fbe Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Mon, 16 Jun 2008 12:09:10 -0500
Subject: x86: increase MAX_APICS for very large x86-64 configs

Increase the maximum number of apics when running very large
configurations. This patch has no affect on most systems.

The patch has no effect on any 32-bit kernel. It adds ~4k to the size
of 64-bit kernels but only if NR_CPUS > 255.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mpspec_def.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mpspec_def.h b/include/asm-x86/mpspec_def.h
index dc6ef85e362..38d1e73b49e 100644
--- a/include/asm-x86/mpspec_def.h
+++ b/include/asm-x86/mpspec_def.h
@@ -17,10 +17,11 @@
 # define MAX_MPC_ENTRY 1024
 # define MAX_APICS      256
 #else
-/*
- * A maximum of 255 APICs with the current APIC ID architecture.
- */
-# define MAX_APICS 255
+# if NR_CPUS <= 255
+#  define MAX_APICS     255
+# else
+#  define MAX_APICS   32768
+# endif
 #endif
 
 struct intel_mp_floating {
-- 
cgit v1.2.3-70-g09d2


From 1ecd27657b735128a728ebf0c31fce5e1456718a Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Fri, 20 Jun 2008 15:38:22 +0200
Subject: x86: unify crashkernel reservation for 32 and 64 bit

This patch moves the reserve_crashkernel() to setup.c and removes the
architecture-specific version. Both versions were more or less the same.

I tested it on both x86-64 and i386, with CONFIG_KEXEC on and off (so
that it compiles).

Signed-off-by: Bernhard Walle <bwalle@suse.de>
Cc: yhlu.kernel@gmail.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c    | 61 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/setup_32.c | 53 ----------------------------------------
 arch/x86/kernel/setup_64.c | 40 ------------------------------
 include/asm-x86/setup.h    |  3 +++
 4 files changed, 64 insertions(+), 93 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ebb0a2bcdc0..c5330f601b6 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -3,6 +3,7 @@
 #include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/percpu.h>
+#include <linux/kexec.h>
 #include <asm/smp.h>
 #include <asm/percpu.h>
 #include <asm/sections.h>
@@ -11,6 +12,7 @@
 #include <asm/topology.h>
 #include <asm/mpspec.h>
 #include <asm/apicdef.h>
+#include <asm/highmem.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 unsigned int num_processors;
@@ -404,3 +406,62 @@ EXPORT_SYMBOL(node_to_cpumask);
 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */
 
 #endif /* X86_64_NUMA */
+
+
+/*
+ * --------- Crashkernel reservation ------------------------------
+ */
+
+static inline unsigned long long get_total_mem(void)
+{
+	unsigned long long total;
+
+	total = max_low_pfn - min_low_pfn;
+#ifdef CONFIG_HIGHMEM
+	total += highend_pfn - highstart_pfn;
+#endif
+
+	return total << PAGE_SHIFT;
+}
+
+#ifdef CONFIG_KEXEC
+void __init reserve_crashkernel(void)
+{
+	unsigned long long total_mem;
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	total_mem = get_total_mem();
+
+	ret = parse_crashkernel(boot_command_line, total_mem,
+			&crash_size, &crash_base);
+	if (ret == 0 && crash_size > 0) {
+		if (crash_base <= 0) {
+			printk(KERN_INFO "crashkernel reservation failed - "
+					"you have to specify a base address\n");
+			return;
+		}
+
+		if (reserve_bootmem_generic(crash_base, crash_size,
+					BOOTMEM_EXCLUSIVE) < 0) {
+			printk(KERN_INFO "crashkernel reservation failed - "
+					"memory is in use\n");
+			return;
+		}
+
+		printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+				"for crashkernel (System RAM: %ldMB)\n",
+				(unsigned long)(crash_size >> 20),
+				(unsigned long)(crash_base >> 20),
+				(unsigned long)(total_mem >> 20));
+
+		crashk_res.start = crash_base;
+		crashk_res.end   = crash_base + crash_size - 1;
+		insert_resource(&iomem_resource, &crashk_res);
+	}
+}
+#else
+void __init reserve_crashkernel(void)
+{}
+#endif
+
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index a9b19ad24ed..369d0fe1ff9 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -429,59 +429,6 @@ extern unsigned long __init setup_memory(void);
 extern void zone_sizes_init(void);
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
-static inline unsigned long long get_total_mem(void)
-{
-	unsigned long long total;
-
-	total = max_low_pfn - min_low_pfn;
-#ifdef CONFIG_HIGHMEM
-	total += highend_pfn - highstart_pfn;
-#endif
-
-	return total << PAGE_SHIFT;
-}
-
-#ifdef CONFIG_KEXEC
-static void __init reserve_crashkernel(void)
-{
-	unsigned long long total_mem;
-	unsigned long long crash_size, crash_base;
-	int ret;
-
-	total_mem = get_total_mem();
-
-	ret = parse_crashkernel(boot_command_line, total_mem,
-			&crash_size, &crash_base);
-	if (ret == 0 && crash_size > 0) {
-		if (crash_base <= 0) {
-			printk(KERN_INFO "crashkernel reservation failed - "
-					"you have to specify a base address\n");
-			return;
-		}
-
-		if (reserve_bootmem_generic(crash_base, crash_size,
-					BOOTMEM_EXCLUSIVE) < 0) {
-			printk(KERN_INFO "crashkernel reservation failed - "
-					"memory is in use\n");
-			return;
-		}
-
-		printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
-				"for crashkernel (System RAM: %ldMB)\n",
-				(unsigned long)(crash_size >> 20),
-				(unsigned long)(crash_base >> 20),
-				(unsigned long)(total_mem >> 20));
-
-		crashk_res.start = crash_base;
-		crashk_res.end   = crash_base + crash_size - 1;
-		insert_resource(&iomem_resource, &crashk_res);
-	}
-}
-#else
-static inline void __init reserve_crashkernel(void)
-{}
-#endif
-
 #ifdef CONFIG_BLK_DEV_INITRD
 
 static bool do_relocate_initrd = false;
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 16ef53ab538..504caeaffd5 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -228,46 +228,6 @@ static inline void copy_edd(void)
 }
 #endif
 
-#ifdef CONFIG_KEXEC
-static void __init reserve_crashkernel(void)
-{
-	unsigned long long total_mem;
-	unsigned long long crash_size, crash_base;
-	int ret;
-
-	total_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
-
-	ret = parse_crashkernel(boot_command_line, total_mem,
-			&crash_size, &crash_base);
-	if (ret == 0 && crash_size) {
-		if (crash_base <= 0) {
-			printk(KERN_INFO "crashkernel reservation failed - "
-					"you have to specify a base address\n");
-			return;
-		}
-
-		if (reserve_bootmem_generic(crash_base, crash_size,
-					BOOTMEM_EXCLUSIVE) < 0) {
-			printk(KERN_INFO "crashkernel reservation failed - "
-					"memory is in use\n");
-			return;
-		}
-
-		printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
-				"for crashkernel (System RAM: %ldMB)\n",
-				(unsigned long)(crash_size >> 20),
-				(unsigned long)(crash_base >> 20),
-				(unsigned long)(total_mem >> 20));
-		crashk_res.start = crash_base;
-		crashk_res.end   = crash_base + crash_size - 1;
-		insert_resource(&iomem_resource, &crashk_res);
-	}
-}
-#else
-static inline void __init reserve_crashkernel(void)
-{}
-#endif
-
 /*
  * setup_arch - architecture-specific boot-time initializations
  *
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index e14b6e73d26..b434bdd82ba 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -8,6 +8,9 @@
 /* Interrupt control for vSMPowered x86_64 systems */
 void vsmp_init(void);
 
+/* Crashkernel reservation */
+void reserve_crashkernel(void);
+
 #ifndef CONFIG_PARAVIRT
 #define paravirt_post_allocator_init()	do {} while (0)
 #endif
-- 
cgit v1.2.3-70-g09d2


From a939098afcfa5f81d3474782ec15c6d114e57763 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 28 May 2008 16:19:53 -0700
Subject: x86: move x86_64 gdt closer to i386

i386 and x86_64 used two different schemes for maintaining the gdt.
With this patch, x86_64 initial gdt table is defined in a .c file,
same way as i386 is now. Also, we call it "gdt_page", and the descriptor,
"early_gdt_descr". This way we achieve common naming, which can allow for
more code integration.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head_64.S        | 48 +++++-----------------------------------
 arch/x86/kernel/setup64.c        |  5 +----
 arch/x86/kernel/setup_64.c       | 19 ++++++++++++++--
 arch/x86/kernel/smpboot.c        | 12 +++-------
 arch/x86/kernel/x8664_ksyms_64.c |  5 -----
 include/asm-x86/desc.h           | 24 +++++++++-----------
 include/asm-x86/segment.h        | 23 ++++++++++---------
 7 files changed, 48 insertions(+), 88 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 918a2711aff..32f5a114d1a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -203,7 +203,7 @@ ENTRY(secondary_startup_64)
 	 * addresses where we're currently running on. We have to do that here
 	 * because in 32bit we couldn't load a 64bit linear address.
 	 */
-	lgdt	cpu_gdt_descr(%rip)
+	lgdt	early_gdt_descr(%rip)
 
 	/* set up data segments. actually 0 would do too */
 	movl $__KERNEL_DS,%eax
@@ -391,54 +391,16 @@ NEXT_PAGE(level2_spare_pgt)
 
 	.data
 	.align 16
-	.globl cpu_gdt_descr
-cpu_gdt_descr:
-	.word	gdt_end-cpu_gdt_table-1
-gdt:
-	.quad	cpu_gdt_table
-#ifdef CONFIG_SMP
-	.rept	NR_CPUS-1
-	.word	0
-	.quad	0
-	.endr
-#endif
+	.globl early_gdt_descr
+early_gdt_descr:
+	.word	GDT_ENTRIES*8-1
+	.quad   per_cpu__gdt_page
 
 ENTRY(phys_base)
 	/* This must match the first entry in level2_kernel_pgt */
 	.quad   0x0000000000000000
 
-/* We need valid kernel segments for data and code in long mode too
- * IRET will check the segment types  kkeil 2000/10/28
- * Also sysret mandates a special GDT layout 
- */
-		 		
-	.section .data.page_aligned, "aw"
-	.align PAGE_SIZE
-
-/* The TLS descriptors are currently at a different place compared to i386.
-   Hopefully nobody expects them at a fixed place (Wine?) */
 	
-ENTRY(cpu_gdt_table)
-	.quad	0x0000000000000000	/* NULL descriptor */
-	.quad	0x00cf9b000000ffff	/* __KERNEL32_CS */
-	.quad	0x00af9b000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf93000000ffff	/* __KERNEL_DS */
-	.quad	0x00cffb000000ffff	/* __USER32_CS */
-	.quad	0x00cff3000000ffff	/* __USER_DS, __USER32_DS  */
-	.quad	0x00affb000000ffff	/* __USER_CS */
-	.quad	0x0			/* unused */
-	.quad	0,0			/* TSS */
-	.quad	0,0			/* LDT */
-	.quad   0,0,0			/* three TLS descriptors */ 
-	.quad	0x0000f40000000000	/* node/CPU stored in limit */
-gdt_end:	
-	/* asm/segment.h:GDT_ENTRIES must match this */	
-	/* This should be a multiple of the cache line size */
-	/* GDTs of other CPUs are now dynamically allocated */
-
-	/* zero the remaining page */
-	.fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
-
 	.section .bss, "aw", @nobits
 	.align L1_CACHE_BYTES
 ENTRY(idt_table)
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index fc1a56da824..70ff0718677 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -202,11 +202,8 @@ void __cpuinit cpu_init (void)
 	 * Initialize the per-CPU GDT with the boot GDT,
 	 * and set up the GDT descriptor:
 	 */
-	if (cpu)
-		memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE);
 
-	cpu_gdt_descr[cpu].size = GDT_SIZE;
-	load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]);
+	switch_to_new_gdt();
 	load_idt((const struct desc_ptr *)&idt_descr);
 
 	memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 504caeaffd5..a93300de4da 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -81,8 +81,6 @@
 #define ARCH_SETUP
 #endif
 
-#include "cpu/cpu.h"
-
 /*
  * Machine setup..
  */
@@ -228,6 +226,23 @@ static inline void copy_edd(void)
 }
 #endif
 
+/* Overridden in paravirt.c if CONFIG_PARAVIRT */
+void __attribute__((weak)) __init memory_setup(void)
+{
+       machine_specific_memory_setup();
+}
+
+/* Current gdt points %fs at the "master" per-cpu area: after this,
+ * it's on the real one. */
+void switch_to_new_gdt(void)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+
 /*
  * setup_arch - architecture-specific boot-time initializations
  *
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a71e3cad547..fe2bd515d6c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -849,14 +849,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 		.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
 	};
 	INIT_WORK(&c_idle.work, do_fork_idle);
-#ifdef CONFIG_X86_64
-	/* allocate memory for gdts of secondary cpus. Hotplug is considered */
-	if (!cpu_gdt_descr[cpu].address &&
-		!(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
-		printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu);
-		return -1;
-	}
 
+#ifdef CONFIG_X86_64
 	/* Allocate node local memory for AP pdas */
 	if (cpu > 0) {
 		boot_error = get_local_pda(cpu);
@@ -898,7 +892,6 @@ do_rest:
 #ifdef CONFIG_X86_32
 	per_cpu(current_task, cpu) = c_idle.idle;
 	init_gdt(cpu);
-	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	c_idle.idle->thread.ip = (unsigned long) start_secondary;
 	/* Stack for startup_32 can be just as for start_secondary onwards */
 	irq_ctx_init(cpu);
@@ -908,6 +901,7 @@ do_rest:
 	initial_code = (unsigned long)start_secondary;
 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
 #endif
+	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	stack_start.sp = (void *) c_idle.idle->thread.sp;
 
 	/* start_ip had better be page-aligned! */
@@ -1252,8 +1246,8 @@ void __init native_smp_prepare_boot_cpu(void)
 	int me = smp_processor_id();
 #ifdef CONFIG_X86_32
 	init_gdt(me);
-	switch_to_new_gdt();
 #endif
+	switch_to_new_gdt();
 	/* already set me in cpu_online_map in boot_cpu_init() */
 	cpu_set(me, cpu_callout_map);
 	per_cpu(cpu_state, me) = CPU_ONLINE;
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index f6c05d0410f..2f306a82689 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -53,8 +53,3 @@ EXPORT_SYMBOL(init_level4_pgt);
 EXPORT_SYMBOL(load_gs_index);
 
 EXPORT_SYMBOL(_proxy_pda);
-
-#ifdef CONFIG_PARAVIRT
-/* Virtualized guests may want to use it */
-EXPORT_SYMBOL_GPL(cpu_gdt_descr);
-#endif
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index b3875d4b4fa..07f9f2b17be 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -29,11 +29,17 @@ static inline void fill_ldt(struct desc_struct *desc,
 extern struct desc_ptr idt_descr;
 extern gate_desc idt_table[];
 
+struct gdt_page {
+	struct desc_struct gdt[GDT_ENTRIES];
+} __attribute__((aligned(PAGE_SIZE)));
+DECLARE_PER_CPU(struct gdt_page, gdt_page);
+
+static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+{
+	return per_cpu(gdt_page, cpu).gdt;
+}
+
 #ifdef CONFIG_X86_64
-extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
-extern struct desc_ptr cpu_gdt_descr[];
-/* the cpu gdt accessor */
-#define get_cpu_gdt_table(x) ((struct desc_struct *)cpu_gdt_descr[x].address)
 
 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
 			     unsigned dpl, unsigned ist, unsigned seg)
@@ -51,16 +57,6 @@ static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
 }
 
 #else
-struct gdt_page {
-	struct desc_struct gdt[GDT_ENTRIES];
-} __attribute__((aligned(PAGE_SIZE)));
-DECLARE_PER_CPU(struct gdt_page, gdt_page);
-
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
-{
-	return per_cpu(gdt_page, cpu).gdt;
-}
-
 static inline void pack_gate(gate_desc *gate, unsigned char type,
 			     unsigned long base, unsigned dpl, unsigned flags,
 			     unsigned short seg)
diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h
index ed5131dd7d9..dfc8601c089 100644
--- a/include/asm-x86/segment.h
+++ b/include/asm-x86/segment.h
@@ -61,18 +61,14 @@
 #define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
 
 #define GDT_ENTRY_DEFAULT_USER_CS	14
-#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
 
 #define GDT_ENTRY_DEFAULT_USER_DS	15
-#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
 
 #define GDT_ENTRY_KERNEL_BASE	12
 
 #define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE + 0)
-#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
 
 #define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE + 1)
-#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
 
 #define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE + 4)
 #define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE + 5)
@@ -139,10 +135,11 @@
 #else
 #include <asm/cache.h>
 
-#define __KERNEL_CS	0x10
-#define __KERNEL_DS	0x18
+#define GDT_ENTRY_KERNEL32_CS 1
+#define GDT_ENTRY_KERNEL_CS 2
+#define GDT_ENTRY_KERNEL_DS 3
 
-#define __KERNEL32_CS   0x08
+#define __KERNEL32_CS   (GDT_ENTRY_KERNEL32_CS * 8)
 
 /*
  * we cannot use the same code segment descriptor for user and kernel
@@ -150,10 +147,10 @@
  * The segment offset needs to contain a RPL. Grr. -AK
  * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
  */
-
-#define __USER32_CS   0x23   /* 4*8+3 */
-#define __USER_DS     0x2b   /* 5*8+3 */
-#define __USER_CS     0x33   /* 6*8+3 */
+#define GDT_ENTRY_DEFAULT_USER32_CS 4
+#define GDT_ENTRY_DEFAULT_USER_DS 5
+#define GDT_ENTRY_DEFAULT_USER_CS 6
+#define __USER32_CS   (GDT_ENTRY_DEFAULT_USER32_CS * 8 + 3)
 #define __USER32_DS	__USER_DS
 
 #define GDT_ENTRY_TSS 8	/* needs two entries */
@@ -175,6 +172,10 @@
 
 #endif
 
+#define __KERNEL_CS	(GDT_ENTRY_KERNEL_CS * 8)
+#define __KERNEL_DS	(GDT_ENTRY_KERNEL_DS * 8)
+#define __USER_DS     (GDT_ENTRY_DEFAULT_USER_DS* 8 + 3)
+#define __USER_CS     (GDT_ENTRY_DEFAULT_USER_CS* 8 + 3)
 #ifndef CONFIG_PARAVIRT
 #define get_kernel_rpl()  0
 #endif
-- 
cgit v1.2.3-70-g09d2


From 3fde690011a84e19f98f77bfaa349b2119ddd2d2 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 28 May 2008 20:34:19 -0700
Subject: x86: change __setup_vector_irq with setup_vector_irq

We create a version of it for i386, and then take the CONFIG_X86_64
ifdef out of the game. We could create a __setup_vector_irq for i386,
but it would incur in an unnecessary lock taking. Moreover, it is better
practice to only export setup_vector_irq anyway.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic_32.c |  5 +++++
 arch/x86/kernel/io_apic_64.c |  9 ++++++++-
 arch/x86/kernel/smpboot.c    | 11 ++---------
 include/asm-x86/hw_irq.h     |  2 +-
 4 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index fedb3b113ac..d6af301c822 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1207,6 +1207,11 @@ static int assign_irq_vector(int irq)
 
 	return vector;
 }
+
+void setup_vector_irq(int cpu)
+{
+}
+
 static struct irq_chip ioapic_chip;
 
 #define IOAPIC_AUTO	-1
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 2eba4f4c14b..08c48750888 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -801,7 +801,7 @@ static void __clear_irq_vector(int irq)
 	cpus_clear(cfg->domain);
 }
 
-void __setup_vector_irq(int cpu)
+static void __setup_vector_irq(int cpu)
 {
 	/* Initialize vector_irq on a new cpu */
 	/* This function must be called with vector_lock held */
@@ -824,6 +824,13 @@ void __setup_vector_irq(int cpu)
 	}
 }
 
+void setup_vector_irq(int cpu)
+{
+	spin_lock(&vector_lock);
+	__setup_vector_irq(smp_processor_id());
+	spin_unlock(&vector_lock);
+}
+
 
 static struct irq_chip ioapic_chip;
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6f9a31a1881..e09f3124738 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -329,15 +329,8 @@ static void __cpuinit start_secondary(void *unused)
 	 * smp_call_function().
 	 */
 	lock_ipi_call_lock();
-#ifdef CONFIG_X86_64
-	spin_lock(&vector_lock);
-
-	/* Setup the per cpu irq handling data structures */
-	__setup_vector_irq(smp_processor_id());
-	/*
-	 * Allow the master to continue.
-	 */
-	spin_unlock(&vector_lock);
+#ifdef CONFIG_X86_IO_APIC
+	setup_vector_irq(smp_processor_id());
 #endif
 	cpu_set(smp_processor_id(), cpu_online_map);
 	unlock_ipi_call_lock();
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 1428b41dcbb..18f067c310f 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -97,9 +97,9 @@ extern void (*const interrupt[NR_IRQS])(void);
 #else
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
-extern void __setup_vector_irq(int cpu);
 extern spinlock_t vector_lock;
 #endif
+extern void setup_vector_irq(int cpu);
 
 #endif /* !ASSEMBLY_ */
 
-- 
cgit v1.2.3-70-g09d2


From 1481a3dd42c21ac4a8b9497cb9f5df816d6b064f Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 4 Jun 2008 15:35:03 -0300
Subject: x86: move cpu_exit_clear to process_32.c

Take it out of smpboot.c, and move it to process_32.c, closer
to its only user.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process_32.c | 16 ++++++++++++++++
 arch/x86/kernel/smpboot.c    | 19 +------------------
 include/asm-x86/numa_32.h    |  1 +
 include/asm-x86/smp.h        |  1 -
 4 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index c2a11d77b1b..9a139f6c9df 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -74,6 +74,22 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 
 #ifdef CONFIG_HOTPLUG_CPU
 #include <asm/nmi.h>
+
+static void cpu_exit_clear(void)
+{
+	int cpu = raw_smp_processor_id();
+
+	idle_task_exit();
+
+	cpu_uninit();
+	irq_ctx_exit(cpu);
+
+	cpu_clear(cpu, cpu_callout_map);
+	cpu_clear(cpu, cpu_callin_map);
+
+	numa_remove_cpu(cpu);
+}
+
 /* We don't actually take CPU down, just spin without interrupts. */
 static inline void play_dead(void)
 {
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 820c23dbe76..67af727f733 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -181,7 +181,7 @@ static void map_cpu_to_logical_apicid(void)
 	map_cpu_to_node(cpu, node);
 }
 
-static void numa_remove_cpu(int cpu)
+void numa_remove_cpu(int cpu)
 {
 	cpu_2_logical_apicid[cpu] = BAD_APICID;
 	unmap_cpu_to_node(cpu);
@@ -1227,23 +1227,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-#  ifdef CONFIG_X86_32
-void cpu_exit_clear(void)
-{
-	int cpu = raw_smp_processor_id();
-
-	idle_task_exit();
-
-	cpu_uninit();
-	irq_ctx_exit(cpu);
-
-	cpu_clear(cpu, cpu_callout_map);
-	cpu_clear(cpu, cpu_callin_map);
-
-	numa_remove_cpu(cpu);
-}
-#  endif /* CONFIG_X86_32 */
-
 static void remove_siblinginfo(int cpu)
 {
 	int sibling;
diff --git a/include/asm-x86/numa_32.h b/include/asm-x86/numa_32.h
index a02674f6486..ed6c88eac84 100644
--- a/include/asm-x86/numa_32.h
+++ b/include/asm-x86/numa_32.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_32_NUMA_H 1
 
 extern int pxm_to_nid(int pxm);
+extern void numa_remove_cpu(int cpu);
 
 #ifdef CONFIG_NUMA
 extern void __init remap_numa_kva(void);
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index fc1007321ef..fad45f6a193 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -188,7 +188,6 @@ static inline int hard_smp_processor_id(void)
 #endif /* CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_HOTPLUG_CPU
-extern void cpu_exit_clear(void);
 extern void cpu_uninit(void);
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 3c999f142665265afd0fe9190204dd051f17e505 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Fri, 20 Jun 2008 16:11:20 -0700
Subject: x86: check command line when CONFIG_X86_MPPARSE is not set, v2

if acpi=off, acpi=noirq and pci=noacpi, we need to disable apic.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "Maciej W. Rozycki" <macro@linux-mips.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c | 14 ++++++++++++++
 arch/x86/kernel/apic_32.c   |  2 +-
 arch/x86/kernel/setup.c     |  4 ++++
 arch/x86/kernel/setup_32.c  |  5 +++++
 arch/x86/kernel/setup_64.c  |  9 +++++++++
 include/asm-x86/apic.h      |  6 +++++-
 include/linux/acpi.h        |  6 ++++++
 7 files changed, 44 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6516359922b..5c0107602b6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1787,6 +1787,20 @@ static int __init parse_pci(char *arg)
 }
 early_param("pci", parse_pci);
 
+int __init acpi_mps_check(void)
+{
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_X86_MPPARSE)
+/* mptable code is not built-in*/
+	if (acpi_disabled || acpi_noirq) {
+		printk(KERN_WARNING "MPS support code is not built-in.\n"
+		       "Using acpi=off or acpi=noirq or pci=noacpi "
+		       "may have problem\n");
+		return 1;
+	}
+#endif
+	return 0;
+}
+
 #ifdef CONFIG_X86_IO_APIC
 static int __init parse_acpi_skip_timer_override(char *arg)
 {
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index dd8de26b278..4932d7813bc 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -57,7 +57,7 @@ unsigned long mp_lapic_addr;
  *
  * -1=force-disable, +1=force-enable
  */
-static int enable_local_apic __initdata;
+int enable_local_apic;
 
 /* Local APIC timer verification ok */
 static int local_apic_timer_verify_ok;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c5330f601b6..56aee55cf8d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -161,6 +161,10 @@ void __init setup_per_cpu_areas(void)
 	char *ptr;
 	int cpu;
 
+	/* no processor from mptable or madt */
+	if (!num_processors)
+		num_processors = 1;
+
 #ifdef CONFIG_HOTPLUG_CPU
 	prefill_possible_map();
 #else
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 369d0fe1ff9..cad4e893df0 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -677,6 +677,11 @@ void __init setup_arch(char **cmdline_p)
 
 	parse_early_param();
 
+	if (acpi_mps_check()){
+		enable_local_apic = -1;
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+	}
+
 	finish_e820_parsing();
 
 	probe_roms();
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index a93300de4da..175c696ec53 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -302,6 +302,11 @@ void __init setup_arch(char **cmdline_p)
 
 	parse_early_param();
 
+	if (acpi_mps_check()) {
+		disable_apic = 1;
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+	}
+
 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
 	if (init_ohci1394_dma_early)
 		init_ohci1394_dma_on_all_controllers();
@@ -723,6 +728,10 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 		cpu_devs[c->x86_vendor]->c_early_init(c);
 
 	validate_pat_support(c);
+
+	/* early_param could clear that, but recall get it set again */
+	if (disable_apic)
+		clear_cpu_cap(c, X86_FEATURE_APIC);
 }
 
 /*
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 313bcaf4b6c..9fe941cd843 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -39,8 +39,12 @@ extern int apic_verbosity;
 extern int local_apic_timer_c2_ok;
 
 extern int ioapic_force;
-extern int disable_apic;
 
+#ifdef CONFIG_X86_64
+extern int disable_apic;
+#else
+extern int enable_local_apic;
+#endif
 /*
  * Basic functions accessing APICs.
  */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 41f7ce7edd7..0601075d09a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -82,6 +82,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
 int early_acpi_boot_init(void);
 int acpi_boot_init (void);
 int acpi_boot_table_init (void);
+int acpi_mps_check (void);
 int acpi_numa_init (void);
 
 int acpi_table_init (void);
@@ -250,6 +251,11 @@ static inline int acpi_boot_table_init(void)
 	return 0;
 }
 
+static inline int acpi_mps_check(void)
+{
+	return 0;
+}
+
 static inline int acpi_check_resource_conflict(struct resource *res)
 {
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From f580366f77cc4e035a68369105fbeae5bf436b4c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 21 Jun 2008 03:24:19 -0700
Subject: x86: seperate funcs from setup_64 to cpu common_64.c

Signed-off-by: Yinghai Lu <yhlu.kernel@mail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/Makefile    |   2 +-
 arch/x86/kernel/cpu/common_64.c | 406 ++++++++++++++++++++
 arch/x86/kernel/setup_64.c      | 823 ----------------------------------------
 include/asm-x86/processor.h     |   1 +
 4 files changed, 408 insertions(+), 824 deletions(-)
 create mode 100644 arch/x86/kernel/cpu/common_64.c
 delete mode 100644 arch/x86/kernel/setup_64.c

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 65b1be5fe9c..ee76eaad300 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -6,7 +6,7 @@ obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
 obj-y			+= proc.o feature_names.o
 
 obj-$(CONFIG_X86_32)	+= common.o bugs.o
-obj-$(CONFIG_X86_64)	+= bugs_64.o
+obj-$(CONFIG_X86_64)	+= common_64.o bugs_64.o
 obj-$(CONFIG_X86_32)	+= amd.o
 obj-$(CONFIG_X86_64)	+= amd_64.o
 obj-$(CONFIG_X86_32)	+= cyrix.o
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
new file mode 100644
index 00000000000..b6f205063f7
--- /dev/null
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -0,0 +1,406 @@
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/bootmem.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/mtrr.h>
+#include <asm/mce.h>
+#include <asm/pat.h>
+#include <asm/numa.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <mach_apic.h>
+#endif
+
+#include "cpu.h"
+
+/* We need valid kernel segments for data and code in long mode too
+ * IRET will check the segment types  kkeil 2000/10/28
+ * Also sysret mandates a special GDT layout
+ */
+/* The TLS descriptors are currently at a different place compared to i386.
+   Hopefully nobody expects them at a fixed place (Wine?) */
+DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
+	[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
+	[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
+	[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
+	[GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
+	[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
+	[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
+} };
+EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
+
+__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+
+/* Current gdt points %fs at the "master" per-cpu area: after this,
+ * it's on the real one. */
+void switch_to_new_gdt(void)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+
+struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
+
+static void __cpuinit default_init(struct cpuinfo_x86 *c)
+{
+	display_cacheinfo(c);
+}
+
+static struct cpu_dev __cpuinitdata default_cpu = {
+	.c_init	= default_init,
+	.c_vendor = "Unknown",
+};
+static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
+
+int __cpuinit get_model_name(struct cpuinfo_x86 *c)
+{
+	unsigned int *v;
+
+	if (c->extended_cpuid_level < 0x80000004)
+		return 0;
+
+	v = (unsigned int *) c->x86_model_id;
+	cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+	cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+	c->x86_model_id[48] = 0;
+	return 1;
+}
+
+
+void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
+{
+	unsigned int n, dummy, eax, ebx, ecx, edx;
+
+	n = c->extended_cpuid_level;
+
+	if (n >= 0x80000005) {
+		cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
+		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
+		       "D cache %dK (%d bytes/line)\n",
+		       edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+		c->x86_cache_size = (ecx>>24) + (edx>>24);
+		/* On K8 L1 TLB is inclusive, so don't count it */
+		c->x86_tlbsize = 0;
+	}
+
+	if (n >= 0x80000006) {
+		cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
+		ecx = cpuid_ecx(0x80000006);
+		c->x86_cache_size = ecx >> 16;
+		c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
+
+		printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
+		c->x86_cache_size, ecx & 0xFF);
+	}
+	if (n >= 0x80000008) {
+		cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
+		c->x86_virt_bits = (eax >> 8) & 0xff;
+		c->x86_phys_bits = eax & 0xff;
+	}
+}
+
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	u32 eax, ebx, ecx, edx;
+	int index_msb, core_bits;
+
+	cpuid(1, &eax, &ebx, &ecx, &edx);
+
+
+	if (!cpu_has(c, X86_FEATURE_HT))
+		return;
+	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
+		goto out;
+
+	smp_num_siblings = (ebx & 0xff0000) >> 16;
+
+	if (smp_num_siblings == 1) {
+		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
+	} else if (smp_num_siblings > 1) {
+
+		if (smp_num_siblings > NR_CPUS) {
+			printk(KERN_WARNING "CPU: Unsupported number of "
+			       "siblings %d", smp_num_siblings);
+			smp_num_siblings = 1;
+			return;
+		}
+
+		index_msb = get_count_order(smp_num_siblings);
+		c->phys_proc_id = phys_pkg_id(index_msb);
+
+		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+
+		index_msb = get_count_order(smp_num_siblings);
+
+		core_bits = get_count_order(c->x86_max_cores);
+
+		c->cpu_core_id = phys_pkg_id(index_msb) &
+					       ((1 << core_bits) - 1);
+	}
+out:
+	if ((c->x86_max_cores * smp_num_siblings) > 1) {
+		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+		       c->phys_proc_id);
+		printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
+		       c->cpu_core_id);
+	}
+
+#endif
+}
+
+static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
+{
+	char *v = c->x86_vendor_id;
+	int i;
+	static int printed;
+
+	for (i = 0; i < X86_VENDOR_NUM; i++) {
+		if (cpu_devs[i]) {
+			if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
+			    (cpu_devs[i]->c_ident[1] &&
+			    !strcmp(v, cpu_devs[i]->c_ident[1]))) {
+				c->x86_vendor = i;
+				this_cpu = cpu_devs[i];
+				return;
+			}
+		}
+	}
+	if (!printed) {
+		printed++;
+		printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
+		printk(KERN_ERR "CPU: Your system may be unstable.\n");
+	}
+	c->x86_vendor = X86_VENDOR_UNKNOWN;
+}
+
+static void __init early_cpu_support_print(void)
+{
+	int i,j;
+	struct cpu_dev *cpu_devx;
+
+	printk("KERNEL supported cpus:\n");
+	for (i = 0; i < X86_VENDOR_NUM; i++) {
+		cpu_devx = cpu_devs[i];
+		if (!cpu_devx)
+			continue;
+		for (j = 0; j < 2; j++) {
+			if (!cpu_devx->c_ident[j])
+				continue;
+			printk("  %s %s\n", cpu_devx->c_vendor,
+				cpu_devx->c_ident[j]);
+		}
+	}
+}
+
+static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
+
+void __init early_cpu_init(void)
+{
+        struct cpu_vendor_dev *cvdev;
+
+        for (cvdev = __x86cpuvendor_start ;
+             cvdev < __x86cpuvendor_end   ;
+             cvdev++)
+                cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
+	early_cpu_support_print();
+	early_identify_cpu(&boot_cpu_data);
+}
+
+/* Do some early cpuid on the boot CPU to get some parameter that are
+   needed before check_bugs. Everything advanced is in identify_cpu
+   below. */
+static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
+{
+	u32 tfms, xlvl;
+
+	c->loops_per_jiffy = loops_per_jiffy;
+	c->x86_cache_size = -1;
+	c->x86_vendor = X86_VENDOR_UNKNOWN;
+	c->x86_model = c->x86_mask = 0;	/* So far unknown... */
+	c->x86_vendor_id[0] = '\0'; /* Unset */
+	c->x86_model_id[0] = '\0';  /* Unset */
+	c->x86_clflush_size = 64;
+	c->x86_cache_alignment = c->x86_clflush_size;
+	c->x86_max_cores = 1;
+	c->x86_coreid_bits = 0;
+	c->extended_cpuid_level = 0;
+	memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+	/* Get vendor name */
+	cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
+	      (unsigned int *)&c->x86_vendor_id[0],
+	      (unsigned int *)&c->x86_vendor_id[8],
+	      (unsigned int *)&c->x86_vendor_id[4]);
+
+	get_cpu_vendor(c);
+
+	/* Initialize the standard set of capabilities */
+	/* Note that the vendor-specific code below might override */
+
+	/* Intel-defined flags: level 0x00000001 */
+	if (c->cpuid_level >= 0x00000001) {
+		__u32 misc;
+		cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
+		      &c->x86_capability[0]);
+		c->x86 = (tfms >> 8) & 0xf;
+		c->x86_model = (tfms >> 4) & 0xf;
+		c->x86_mask = tfms & 0xf;
+		if (c->x86 == 0xf)
+			c->x86 += (tfms >> 20) & 0xff;
+		if (c->x86 >= 0x6)
+			c->x86_model += ((tfms >> 16) & 0xF) << 4;
+		if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
+			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
+	} else {
+		/* Have CPUID level 0 only - unheard of */
+		c->x86 = 4;
+	}
+
+	c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
+#ifdef CONFIG_SMP
+	c->phys_proc_id = c->initial_apicid;
+#endif
+	/* AMD-defined flags: level 0x80000001 */
+	xlvl = cpuid_eax(0x80000000);
+	c->extended_cpuid_level = xlvl;
+	if ((xlvl & 0xffff0000) == 0x80000000) {
+		if (xlvl >= 0x80000001) {
+			c->x86_capability[1] = cpuid_edx(0x80000001);
+			c->x86_capability[6] = cpuid_ecx(0x80000001);
+		}
+		if (xlvl >= 0x80000004)
+			get_model_name(c); /* Default name */
+	}
+
+	/* Transmeta-defined flags: level 0x80860001 */
+	xlvl = cpuid_eax(0x80860000);
+	if ((xlvl & 0xffff0000) == 0x80860000) {
+		/* Don't set x86_cpuid_level here for now to not confuse. */
+		if (xlvl >= 0x80860001)
+			c->x86_capability[2] = cpuid_edx(0x80860001);
+	}
+
+	c->extended_cpuid_level = cpuid_eax(0x80000000);
+	if (c->extended_cpuid_level >= 0x80000007)
+		c->x86_power = cpuid_edx(0x80000007);
+
+	if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
+	    cpu_devs[c->x86_vendor]->c_early_init)
+		cpu_devs[c->x86_vendor]->c_early_init(c);
+
+	validate_pat_support(c);
+
+	/* early_param could clear that, but recall get it set again */
+	if (disable_apic)
+		clear_cpu_cap(c, X86_FEATURE_APIC);
+}
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+{
+	int i;
+
+	early_identify_cpu(c);
+
+	init_scattered_cpuid_features(c);
+
+	c->apicid = phys_pkg_id(0);
+
+	/*
+	 * Vendor-specific initialization.  In this section we
+	 * canonicalize the feature flags, meaning if there are
+	 * features a certain CPU supports which CPUID doesn't
+	 * tell us, CPUID claiming incorrect flags, or other bugs,
+	 * we handle them here.
+	 *
+	 * At the end of this section, c->x86_capability better
+	 * indicate the features this CPU genuinely supports!
+	 */
+	if (this_cpu->c_init)
+		this_cpu->c_init(c);
+
+	detect_ht(c);
+
+	/*
+	 * On SMP, boot_cpu_data holds the common feature set between
+	 * all CPUs; so make sure that we indicate which features are
+	 * common between the CPUs.  The first time this routine gets
+	 * executed, c == &boot_cpu_data.
+	 */
+	if (c != &boot_cpu_data) {
+		/* AND the already accumulated flags with these */
+		for (i = 0; i < NCAPINTS; i++)
+			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+	}
+
+	/* Clear all flags overriden by options */
+	for (i = 0; i < NCAPINTS; i++)
+		c->x86_capability[i] &= ~cleared_cpu_caps[i];
+
+#ifdef CONFIG_X86_MCE
+	mcheck_init(c);
+#endif
+	select_idle_routine(c);
+
+#ifdef CONFIG_NUMA
+	numa_add_cpu(smp_processor_id());
+#endif
+
+}
+
+void __cpuinit identify_boot_cpu(void)
+{
+	identify_cpu(&boot_cpu_data);
+}
+
+void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
+{
+	BUG_ON(c == &boot_cpu_data);
+	identify_cpu(c);
+	mtrr_ap_init();
+}
+
+static __init int setup_noclflush(char *arg)
+{
+	setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
+	return 1;
+}
+__setup("noclflush", setup_noclflush);
+
+void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
+{
+	if (c->x86_model_id[0])
+		printk(KERN_CONT "%s", c->x86_model_id);
+
+	if (c->x86_mask || c->cpuid_level >= 0)
+		printk(KERN_CONT " stepping %02x\n", c->x86_mask);
+	else
+		printk(KERN_CONT "\n");
+}
+
+static __init int setup_disablecpuid(char *arg)
+{
+	int bit;
+	if (get_option(&arg, &bit) && bit < NCAPINTS*32)
+		setup_clear_cpu_cap(bit);
+	else
+		return 0;
+	return 1;
+}
+__setup("clearcpuid=", setup_disablecpuid);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
deleted file mode 100644
index 9b516eecada..00000000000
--- a/arch/x86/kernel/setup_64.c
+++ /dev/null
@@ -1,823 +0,0 @@
-/*
- *  Copyright (C) 1995  Linus Torvalds
- */
-
-/*
- * This file handles the architecture-dependent parts of initialization
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/ptrace.h>
-#include <linux/slab.h>
-#include <linux/user.h>
-#include <linux/screen_info.h>
-#include <linux/ioport.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/initrd.h>
-#include <linux/highmem.h>
-#include <linux/bootmem.h>
-#include <linux/module.h>
-#include <asm/processor.h>
-#include <linux/console.h>
-#include <linux/seq_file.h>
-#include <linux/crash_dump.h>
-#include <linux/root_dev.h>
-#include <linux/pci.h>
-#include <asm/pci-direct.h>
-#include <linux/efi.h>
-#include <linux/acpi.h>
-#include <linux/kallsyms.h>
-#include <linux/edd.h>
-#include <linux/iscsi_ibft.h>
-#include <linux/mmzone.h>
-#include <linux/kexec.h>
-#include <linux/cpufreq.h>
-#include <linux/dmi.h>
-#include <linux/dma-mapping.h>
-#include <linux/ctype.h>
-#include <linux/sort.h>
-#include <linux/uaccess.h>
-#include <linux/init_ohci1394_dma.h>
-#include <linux/kvm_para.h>
-
-#include <asm/mtrr.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/vsyscall.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/msr.h>
-#include <asm/desc.h>
-#include <video/edid.h>
-#include <asm/e820.h>
-#include <asm/mpspec.h>
-#include <asm/dma.h>
-#include <asm/gart.h>
-#include <asm/mpspec.h>
-#include <asm/mmu_context.h>
-#include <asm/proto.h>
-#include <asm/setup.h>
-#include <asm/numa.h>
-#include <asm/sections.h>
-#include <asm/dmi.h>
-#include <asm/cacheflush.h>
-#include <asm/mce.h>
-#include <asm/ds.h>
-#include <asm/topology.h>
-#include <asm/trampoline.h>
-#include <asm/pat.h>
-
-#include <mach_apic.h>
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define ARCH_SETUP
-#endif
-
-/*
- * Machine setup..
- */
-
-struct cpuinfo_x86 boot_cpu_data __read_mostly;
-EXPORT_SYMBOL(boot_cpu_data);
-
-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
-
-unsigned long mmu_cr4_features;
-
-/* Boot loader ID as an integer, for the benefit of proc_dointvec */
-int bootloader_type;
-
-unsigned long saved_video_mode;
-
-/*
- * Early DMI memory
- */
-int dmi_alloc_index;
-char dmi_alloc_data[DMI_MAX_DATA];
-
-/*
- * Setup options
- */
-struct screen_info screen_info;
-EXPORT_SYMBOL(screen_info);
-struct sys_desc_table_struct {
-	unsigned short length;
-	unsigned char table[0];
-};
-
-struct edid_info edid_info;
-EXPORT_SYMBOL_GPL(edid_info);
-
-extern int root_mountflags;
-
-static char __initdata command_line[COMMAND_LINE_SIZE];
-
-static struct resource standard_io_resources[] = {
-	{ .name = "dma1", .start = 0x00, .end = 0x1f,
-		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
-	{ .name = "pic1", .start = 0x20, .end = 0x21,
-		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
-	{ .name = "timer0", .start = 0x40, .end = 0x43,
-		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
-	{ .name = "timer1", .start = 0x50, .end = 0x53,
-		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
-	{ .name = "keyboard", .start = 0x60, .end = 0x60,
-		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
-	{ .name = "keyboard", .start = 0x64, .end = 0x64,
-		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
-	{ .name = "dma page reg", .start = 0x80, .end = 0x8f,
-		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
-	{ .name = "pic2", .start = 0xa0, .end = 0xa1,
-		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
-	{ .name = "dma2", .start = 0xc0, .end = 0xdf,
-		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
-	{ .name = "fpu", .start = 0xf0, .end = 0xff,
-		.flags = IORESOURCE_BUSY | IORESOURCE_IO }
-};
-
-#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
-
-static struct resource data_resource = {
-	.name = "Kernel data",
-	.start = 0,
-	.end = 0,
-	.flags = IORESOURCE_RAM,
-};
-static struct resource code_resource = {
-	.name = "Kernel code",
-	.start = 0,
-	.end = 0,
-	.flags = IORESOURCE_RAM,
-};
-static struct resource bss_resource = {
-	.name = "Kernel bss",
-	.start = 0,
-	.end = 0,
-	.flags = IORESOURCE_RAM,
-};
-
-static void __init early_cpu_init(void);
-static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
-
-#ifdef CONFIG_PROC_VMCORE
-/* elfcorehdr= specifies the location of elf core header
- * stored by the crashed kernel. This option will be passed
- * by kexec loader to the capture kernel.
- */
-static int __init setup_elfcorehdr(char *arg)
-{
-	char *end;
-	if (!arg)
-		return -EINVAL;
-	elfcorehdr_addr = memparse(arg, &end);
-	return end > arg ? 0 : -EINVAL;
-}
-early_param("elfcorehdr", setup_elfcorehdr);
-#endif
-
-#ifndef CONFIG_NUMA
-static void __init
-contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{
-	unsigned long bootmap_size, bootmap;
-
-	bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
-	bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
-				 PAGE_SIZE);
-	if (bootmap == -1L)
-		panic("Cannot find bootmem map of size %ld\n", bootmap_size);
-	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
-	e820_register_active_regions(0, start_pfn, end_pfn);
-	free_bootmem_with_active_regions(0, end_pfn);
-	early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
-	reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
-}
-#endif
-
-#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
-struct edd edd;
-#ifdef CONFIG_EDD_MODULE
-EXPORT_SYMBOL(edd);
-#endif
-/**
- * copy_edd() - Copy the BIOS EDD information
- *              from boot_params into a safe place.
- *
- */
-static inline void copy_edd(void)
-{
-     memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
-	    sizeof(edd.mbr_signature));
-     memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
-     edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
-     edd.edd_info_nr = boot_params.eddbuf_entries;
-}
-#else
-static inline void copy_edd(void)
-{
-}
-#endif
-
-/* Overridden in paravirt.c if CONFIG_PARAVIRT */
-void __attribute__((weak)) __init memory_setup(void)
-{
-       machine_specific_memory_setup();
-}
-
-/* Current gdt points %fs at the "master" per-cpu area: after this,
- * it's on the real one. */
-void switch_to_new_gdt(void)
-{
-	struct desc_ptr gdt_descr;
-
-	gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
-	gdt_descr.size = GDT_SIZE - 1;
-	load_gdt(&gdt_descr);
-}
-
-/*
- * setup_arch - architecture-specific boot-time initializations
- *
- * Note: On x86_64, fixmaps are ready for use even before this is called.
- */
-void __init setup_arch(char **cmdline_p)
-{
-	unsigned i;
-
-	printk(KERN_INFO "Command line: %s\n", boot_command_line);
-
-	ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
-	screen_info = boot_params.screen_info;
-	edid_info = boot_params.edid_info;
-	saved_video_mode = boot_params.hdr.vid_mode;
-	bootloader_type = boot_params.hdr.type_of_loader;
-
-#ifdef CONFIG_BLK_DEV_RAM
-	rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
-	rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
-	rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
-#endif
-#ifdef CONFIG_EFI
-	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
-		     "EL64", 4)) {
-		efi_enabled = 1;
-		efi_reserve_early();
-	}
-#endif
-
-	ARCH_SETUP
-
-	setup_memory_map();
-	copy_edd();
-
-	if (!boot_params.hdr.root_flags)
-		root_mountflags &= ~MS_RDONLY;
-	init_mm.start_code = (unsigned long) &_text;
-	init_mm.end_code = (unsigned long) &_etext;
-	init_mm.end_data = (unsigned long) &_edata;
-	init_mm.brk = (unsigned long) &_end;
-
-	code_resource.start = virt_to_phys(&_text);
-	code_resource.end = virt_to_phys(&_etext)-1;
-	data_resource.start = virt_to_phys(&_etext);
-	data_resource.end = virt_to_phys(&_edata)-1;
-	bss_resource.start = virt_to_phys(&__bss_start);
-	bss_resource.end = virt_to_phys(&__bss_stop)-1;
-
-	early_cpu_init();
-	early_identify_cpu(&boot_cpu_data);
-
-	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
-	*cmdline_p = command_line;
-
-	parse_setup_data();
-
-	parse_early_param();
-
-	if (acpi_mps_check()) {
-		disable_apic = 1;
-		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
-	}
-
-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
-	if (init_ohci1394_dma_early)
-		init_ohci1394_dma_on_all_controllers();
-#endif
-
-	finish_e820_parsing();
-
-	/* after parse_early_param, so could debug it */
-	insert_resource(&iomem_resource, &code_resource);
-	insert_resource(&iomem_resource, &data_resource);
-	insert_resource(&iomem_resource, &bss_resource);
-
-	early_gart_iommu_check();
-
-	e820_register_active_regions(0, 0, -1UL);
-	/*
-	 * partially used pages are not usable - thus
-	 * we are rounding upwards:
-	 */
-	end_pfn = e820_end_of_ram();
-
-	/* pre allocte 4k for mptable mpc */
-	early_reserve_e820_mpc_new();
-	/* update e820 for memory not covered by WB MTRRs */
-	mtrr_bp_init();
-	if (mtrr_trim_uncached_memory(end_pfn)) {
-		remove_all_active_ranges();
-		e820_register_active_regions(0, 0, -1UL);
-		end_pfn = e820_end_of_ram();
-	}
-
-	num_physpages = end_pfn;
-
-	check_efer();
-
-	max_pfn_mapped = init_memory_mapping(0, (end_pfn << PAGE_SHIFT));
-	if (efi_enabled)
-		efi_init();
-
-	vsmp_init();
-
-	dmi_scan_machine();
-
-	io_delay_init();
-
-#ifdef CONFIG_KVM_CLOCK
-	kvmclock_init();
-#endif
-
-	/*
-	 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
-	 * Call this early for SRAT node setup.
-	 */
-	acpi_boot_table_init();
-
-	/* How many end-of-memory variables you have, grandma! */
-	max_low_pfn = end_pfn;
-	max_pfn = end_pfn;
-	high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
-
-	/* Remove active ranges so rediscovery with NUMA-awareness happens */
-	remove_all_active_ranges();
-
-#ifdef CONFIG_ACPI_NUMA
-	/*
-	 * Parse SRAT to discover nodes.
-	 */
-	acpi_numa_init();
-#endif
-
-#ifdef CONFIG_NUMA
-	numa_initmem_init(0, end_pfn);
-#else
-	contig_initmem_init(0, end_pfn);
-#endif
-
-	dma32_reserve_bootmem();
-
-#ifdef CONFIG_ACPI_SLEEP
-	/*
-	 * Reserve low memory region for sleep support.
-	 */
-       acpi_reserve_bootmem();
-#endif
-
-#ifdef CONFIG_X86_MPPARSE
-       /*
-	* Find and reserve possible boot-time SMP configuration:
-	*/
-	find_smp_config();
-#endif
-#ifdef CONFIG_BLK_DEV_INITRD
-	if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
-		unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
-		unsigned long ramdisk_size  = boot_params.hdr.ramdisk_size;
-		unsigned long ramdisk_end   = ramdisk_image + ramdisk_size;
-		unsigned long end_of_mem    = end_pfn << PAGE_SHIFT;
-
-		if (ramdisk_end <= end_of_mem) {
-			/*
-			 * don't need to reserve again, already reserved early
-			 * in x86_64_start_kernel, and early_res_to_bootmem
-			 * convert that to reserved in bootmem
-			 */
-			initrd_start = ramdisk_image + PAGE_OFFSET;
-			initrd_end = initrd_start+ramdisk_size;
-		} else {
-			free_bootmem(ramdisk_image, ramdisk_size);
-			printk(KERN_ERR "initrd extends beyond end of memory "
-			       "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
-			       ramdisk_end, end_of_mem);
-			initrd_start = 0;
-		}
-	}
-#endif
-	reserve_crashkernel();
-
-	reserve_ibft_region();
-
-	paging_init();
-	map_vsyscall();
-
-	early_quirks();
-
-	/*
-	 * Read APIC and some other early information from ACPI tables.
-	 */
-	acpi_boot_init();
-
-	init_cpu_to_node();
-
-#ifdef CONFIG_X86_MPPARSE
-	/*
-	 * get boot-time SMP configuration:
-	 */
-	if (smp_found_config)
-		get_smp_config();
-#endif
-	init_apic_mappings();
-	ioapic_init_mappings();
-
-	kvm_guest_init();
-
-	/*
-	 * We trust e820 completely. No explicit ROM probing in memory.
-	 */
-	e820_reserve_resources();
-	e820_mark_nosave_regions(end_pfn);
-
-	/* request I/O space for devices used on all i[345]86 PCs */
-	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
-		request_resource(&ioport_resource, &standard_io_resources[i]);
-
-	e820_setup_gap();
-
-#ifdef CONFIG_VT
-#if defined(CONFIG_VGA_CONSOLE)
-	if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
-		conswitchp = &vga_con;
-#elif defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
-#endif
-#endif
-}
-
-struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
-
-static void __cpuinit default_init(struct cpuinfo_x86 *c)
-{
-	display_cacheinfo(c);
-}
-
-static struct cpu_dev __cpuinitdata default_cpu = {
-	.c_init	= default_init,
-	.c_vendor = "Unknown",
-};
-static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
-
-int __cpuinit get_model_name(struct cpuinfo_x86 *c)
-{
-	unsigned int *v;
-
-	if (c->extended_cpuid_level < 0x80000004)
-		return 0;
-
-	v = (unsigned int *) c->x86_model_id;
-	cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
-	cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
-	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
-	c->x86_model_id[48] = 0;
-	return 1;
-}
-
-
-void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
-{
-	unsigned int n, dummy, eax, ebx, ecx, edx;
-
-	n = c->extended_cpuid_level;
-
-	if (n >= 0x80000005) {
-		cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
-		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
-		       "D cache %dK (%d bytes/line)\n",
-		       edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
-		c->x86_cache_size = (ecx>>24) + (edx>>24);
-		/* On K8 L1 TLB is inclusive, so don't count it */
-		c->x86_tlbsize = 0;
-	}
-
-	if (n >= 0x80000006) {
-		cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
-		ecx = cpuid_ecx(0x80000006);
-		c->x86_cache_size = ecx >> 16;
-		c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
-
-		printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
-		c->x86_cache_size, ecx & 0xFF);
-	}
-	if (n >= 0x80000008) {
-		cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
-		c->x86_virt_bits = (eax >> 8) & 0xff;
-		c->x86_phys_bits = eax & 0xff;
-	}
-}
-
-void __cpuinit detect_ht(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_SMP
-	u32 eax, ebx, ecx, edx;
-	int index_msb, core_bits;
-
-	cpuid(1, &eax, &ebx, &ecx, &edx);
-
-
-	if (!cpu_has(c, X86_FEATURE_HT))
-		return;
-	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
-		goto out;
-
-	smp_num_siblings = (ebx & 0xff0000) >> 16;
-
-	if (smp_num_siblings == 1) {
-		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
-	} else if (smp_num_siblings > 1) {
-
-		if (smp_num_siblings > NR_CPUS) {
-			printk(KERN_WARNING "CPU: Unsupported number of "
-			       "siblings %d", smp_num_siblings);
-			smp_num_siblings = 1;
-			return;
-		}
-
-		index_msb = get_count_order(smp_num_siblings);
-		c->phys_proc_id = phys_pkg_id(index_msb);
-
-		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
-
-		index_msb = get_count_order(smp_num_siblings);
-
-		core_bits = get_count_order(c->x86_max_cores);
-
-		c->cpu_core_id = phys_pkg_id(index_msb) &
-					       ((1 << core_bits) - 1);
-	}
-out:
-	if ((c->x86_max_cores * smp_num_siblings) > 1) {
-		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
-		       c->phys_proc_id);
-		printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
-		       c->cpu_core_id);
-	}
-
-#endif
-}
-
-static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
-{
-	char *v = c->x86_vendor_id;
-	int i;
-	static int printed;
-
-	for (i = 0; i < X86_VENDOR_NUM; i++) {
-		if (cpu_devs[i]) {
-			if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
-			    (cpu_devs[i]->c_ident[1] &&
-			    !strcmp(v, cpu_devs[i]->c_ident[1]))) {
-				c->x86_vendor = i;
-				this_cpu = cpu_devs[i];
-				return;
-			}
-		}
-	}
-	if (!printed) {
-		printed++;
-		printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
-		printk(KERN_ERR "CPU: Your system may be unstable.\n");
-	}
-	c->x86_vendor = X86_VENDOR_UNKNOWN;
-}
-
-static void __init early_cpu_support_print(void)
-{
-	int i,j;
-	struct cpu_dev *cpu_devx;
-
-	printk("KERNEL supported cpus:\n");
-	for (i = 0; i < X86_VENDOR_NUM; i++) {
-		cpu_devx = cpu_devs[i];
-		if (!cpu_devx)
-			continue;
-		for (j = 0; j < 2; j++) {
-			if (!cpu_devx->c_ident[j])
-				continue;
-			printk("  %s %s\n", cpu_devx->c_vendor,
-				cpu_devx->c_ident[j]);
-		}
-	}
-}
-
-static void __init early_cpu_init(void)
-{
-        struct cpu_vendor_dev *cvdev;
-
-        for (cvdev = __x86cpuvendor_start ;
-             cvdev < __x86cpuvendor_end   ;
-             cvdev++)
-                cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
-	early_cpu_support_print();
-}
-
-/* Do some early cpuid on the boot CPU to get some parameter that are
-   needed before check_bugs. Everything advanced is in identify_cpu
-   below. */
-static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
-{
-	u32 tfms, xlvl;
-
-	c->loops_per_jiffy = loops_per_jiffy;
-	c->x86_cache_size = -1;
-	c->x86_vendor = X86_VENDOR_UNKNOWN;
-	c->x86_model = c->x86_mask = 0;	/* So far unknown... */
-	c->x86_vendor_id[0] = '\0'; /* Unset */
-	c->x86_model_id[0] = '\0';  /* Unset */
-	c->x86_clflush_size = 64;
-	c->x86_cache_alignment = c->x86_clflush_size;
-	c->x86_max_cores = 1;
-	c->x86_coreid_bits = 0;
-	c->extended_cpuid_level = 0;
-	memset(&c->x86_capability, 0, sizeof c->x86_capability);
-
-	/* Get vendor name */
-	cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
-	      (unsigned int *)&c->x86_vendor_id[0],
-	      (unsigned int *)&c->x86_vendor_id[8],
-	      (unsigned int *)&c->x86_vendor_id[4]);
-
-	get_cpu_vendor(c);
-
-	/* Initialize the standard set of capabilities */
-	/* Note that the vendor-specific code below might override */
-
-	/* Intel-defined flags: level 0x00000001 */
-	if (c->cpuid_level >= 0x00000001) {
-		__u32 misc;
-		cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
-		      &c->x86_capability[0]);
-		c->x86 = (tfms >> 8) & 0xf;
-		c->x86_model = (tfms >> 4) & 0xf;
-		c->x86_mask = tfms & 0xf;
-		if (c->x86 == 0xf)
-			c->x86 += (tfms >> 20) & 0xff;
-		if (c->x86 >= 0x6)
-			c->x86_model += ((tfms >> 16) & 0xF) << 4;
-		if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
-			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
-	} else {
-		/* Have CPUID level 0 only - unheard of */
-		c->x86 = 4;
-	}
-
-	c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
-#ifdef CONFIG_SMP
-	c->phys_proc_id = c->initial_apicid;
-#endif
-	/* AMD-defined flags: level 0x80000001 */
-	xlvl = cpuid_eax(0x80000000);
-	c->extended_cpuid_level = xlvl;
-	if ((xlvl & 0xffff0000) == 0x80000000) {
-		if (xlvl >= 0x80000001) {
-			c->x86_capability[1] = cpuid_edx(0x80000001);
-			c->x86_capability[6] = cpuid_ecx(0x80000001);
-		}
-		if (xlvl >= 0x80000004)
-			get_model_name(c); /* Default name */
-	}
-
-	/* Transmeta-defined flags: level 0x80860001 */
-	xlvl = cpuid_eax(0x80860000);
-	if ((xlvl & 0xffff0000) == 0x80860000) {
-		/* Don't set x86_cpuid_level here for now to not confuse. */
-		if (xlvl >= 0x80860001)
-			c->x86_capability[2] = cpuid_edx(0x80860001);
-	}
-
-	c->extended_cpuid_level = cpuid_eax(0x80000000);
-	if (c->extended_cpuid_level >= 0x80000007)
-		c->x86_power = cpuid_edx(0x80000007);
-
-	if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
-	    cpu_devs[c->x86_vendor]->c_early_init)
-		cpu_devs[c->x86_vendor]->c_early_init(c);
-
-	validate_pat_support(c);
-
-	/* early_param could clear that, but recall get it set again */
-	if (disable_apic)
-		clear_cpu_cap(c, X86_FEATURE_APIC);
-}
-
-/*
- * This does the hard work of actually picking apart the CPU stuff...
- */
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
-{
-	int i;
-
-	early_identify_cpu(c);
-
-	init_scattered_cpuid_features(c);
-
-	c->apicid = phys_pkg_id(0);
-
-	/*
-	 * Vendor-specific initialization.  In this section we
-	 * canonicalize the feature flags, meaning if there are
-	 * features a certain CPU supports which CPUID doesn't
-	 * tell us, CPUID claiming incorrect flags, or other bugs,
-	 * we handle them here.
-	 *
-	 * At the end of this section, c->x86_capability better
-	 * indicate the features this CPU genuinely supports!
-	 */
-	if (this_cpu->c_init)
-		this_cpu->c_init(c);
-
-	detect_ht(c);
-
-	/*
-	 * On SMP, boot_cpu_data holds the common feature set between
-	 * all CPUs; so make sure that we indicate which features are
-	 * common between the CPUs.  The first time this routine gets
-	 * executed, c == &boot_cpu_data.
-	 */
-	if (c != &boot_cpu_data) {
-		/* AND the already accumulated flags with these */
-		for (i = 0; i < NCAPINTS; i++)
-			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
-	}
-
-	/* Clear all flags overriden by options */
-	for (i = 0; i < NCAPINTS; i++)
-		c->x86_capability[i] &= ~cleared_cpu_caps[i];
-
-#ifdef CONFIG_X86_MCE
-	mcheck_init(c);
-#endif
-	select_idle_routine(c);
-
-#ifdef CONFIG_NUMA
-	numa_add_cpu(smp_processor_id());
-#endif
-
-}
-
-void __cpuinit identify_boot_cpu(void)
-{
-	identify_cpu(&boot_cpu_data);
-}
-
-void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
-{
-	BUG_ON(c == &boot_cpu_data);
-	identify_cpu(c);
-	mtrr_ap_init();
-}
-
-static __init int setup_noclflush(char *arg)
-{
-	setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
-	return 1;
-}
-__setup("noclflush", setup_noclflush);
-
-void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
-{
-	if (c->x86_model_id[0])
-		printk(KERN_CONT "%s", c->x86_model_id);
-
-	if (c->x86_mask || c->cpuid_level >= 0)
-		printk(KERN_CONT " stepping %02x\n", c->x86_mask);
-	else
-		printk(KERN_CONT "\n");
-}
-
-static __init int setup_disablecpuid(char *arg)
-{
-	int bit;
-	if (get_option(&arg, &bit) && bit < NCAPINTS*32)
-		setup_clear_cpu_cap(bit);
-	else
-		return 0;
-	return 1;
-}
-__setup("clearcpuid=", setup_disablecpuid);
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 4ab2ede6f4b..a5df34fb523 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -153,6 +153,7 @@ static inline int hlt_works(int cpu)
 
 extern void cpu_detect(struct cpuinfo_x86 *c);
 
+extern void early_cpu_init(void);
 extern void identify_cpu(struct cpuinfo_x86 *);
 extern void identify_boot_cpu(void);
 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
-- 
cgit v1.2.3-70-g09d2


From 9a250347591da3e60b5ee53dd1d341732f081117 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 21 Jun 2008 03:24:00 -0700
Subject: x86: change identify_cpu to static

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common.c    | 2 +-
 arch/x86/kernel/cpu/common_64.c | 2 +-
 include/asm-x86/processor.h     | 1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d0463a94624..80ab20d4fa3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -427,7 +427,7 @@ __setup("serialnumber", x86_serial_nr_setup);
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  */
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 {
 	int i;
 
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index b6f205063f7..48ba7996158 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -312,7 +312,7 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  */
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 {
 	int i;
 
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index a5df34fb523..12b5ede14c7 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -154,7 +154,6 @@ static inline int hlt_works(int cpu)
 extern void cpu_detect(struct cpuinfo_x86 *c);
 
 extern void early_cpu_init(void);
-extern void identify_cpu(struct cpuinfo_x86 *);
 extern void identify_boot_cpu(void);
 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);
-- 
cgit v1.2.3-70-g09d2


From 7a1fd9866cbb59a00006f1e0fd5726951b167c97 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 21 Jun 2008 14:48:05 -0700
Subject: x86: add e820_remove_range

... so could add real hole in e820

agp check is using request_mem_region, and could fail if e820 is reserved...

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c | 35 +++++++++++++++++++++++++++++++++++
 include/asm-x86/e820.h |  2 ++
 2 files changed, 37 insertions(+)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7b613d2efb0..e285ea38c8e 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -429,6 +429,41 @@ u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
 	return real_updated_size;
 }
 
+/* make e820 not cover the range */
+u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
+			     int checktype)
+{
+	int i;
+	u64 real_removed_size = 0;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		u64 final_start, final_end;
+
+		if (checktype && ei->type != old_type)
+			continue;
+		/* totally covered? */
+		if (ei->addr >= start &&
+		    (ei->addr + ei->size) <= (start + size)) {
+			real_removed_size += ei->size;
+			memset(ei, 0, sizeof(struct e820entry));
+			continue;
+		}
+		/* partially covered */
+		final_start = max(start, ei->addr);
+		final_end = min(start + size, ei->addr + ei->size);
+		if (final_start >= final_end)
+			continue;
+		real_removed_size += final_end - final_start;
+
+		ei->size -= final_end - final_start;
+		if (ei->addr < final_start)
+			continue;
+		ei->addr = final_end;
+	}
+	return real_removed_size;
+}
+
 void __init update_e820(void)
 {
 	int nr_map;
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 0e92b6a2ea0..7c32df07bae 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -67,6 +67,8 @@ sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map);
 extern int copy_e820_map(struct e820entry *biosmap, int nr_map);
 extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
 			       unsigned new_type);
+extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
+			     int checktype);
 extern void update_e820(void);
 extern void e820_setup_gap(void);
 struct setup_data;
-- 
cgit v1.2.3-70-g09d2


From a9c1182fbd349882fe912245d6e03cd30943be2d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 21 Jun 2008 15:39:41 -0700
Subject: x86: seperate probe_roms into another file

it is only needed for 32bit

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile        |   1 +
 arch/x86/kernel/probe_roms_32.c | 166 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/setup_32.c      | 146 -----------------------------------
 include/asm-x86/setup.h         |   1 +
 4 files changed, 168 insertions(+), 146 deletions(-)
 create mode 100644 arch/x86/kernel/probe_roms_32.c

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9a71be82792..9c9ce75e3ae 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -19,6 +19,7 @@ obj-y			:= process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
 obj-y			+= traps_$(BITS).o irq_$(BITS).o
 obj-y			+= time_$(BITS).o ioport.o ldt.o
 obj-y			+= setup_$(BITS).o i8259.o irqinit_$(BITS).o setup.o
+obj-$(CONFIG_X86_32)	+= probe_roms_32.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o setup64.o
diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms_32.c
new file mode 100644
index 00000000000..675a48c404a
--- /dev/null
+++ b/arch/x86/kernel/probe_roms_32.c
@@ -0,0 +1,166 @@
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <linux/mmzone.h>
+#include <linux/ioport.h>
+#include <linux/seq_file.h>
+#include <linux/console.h>
+#include <linux/init.h>
+#include <linux/edd.h>
+#include <linux/dmi.h>
+#include <linux/pfn.h>
+#include <linux/pci.h>
+#include <asm/pci-direct.h>
+
+
+#include <asm/e820.h>
+#include <asm/mmzone.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <setup_arch.h>
+
+static struct resource system_rom_resource = {
+	.name	= "System ROM",
+	.start	= 0xf0000,
+	.end	= 0xfffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+static struct resource extension_rom_resource = {
+	.name	= "Extension ROM",
+	.start	= 0xe0000,
+	.end	= 0xeffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+static struct resource adapter_rom_resources[] = { {
+	.name 	= "Adapter ROM",
+	.start	= 0xc8000,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+} };
+
+static struct resource video_rom_resource = {
+	.name 	= "Video ROM",
+	.start	= 0xc0000,
+	.end	= 0xc7fff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+#define ROMSIGNATURE 0xaa55
+
+static int __init romsignature(const unsigned char *rom)
+{
+	const unsigned short * const ptr = (const unsigned short *)rom;
+	unsigned short sig;
+
+	return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
+}
+
+static int __init romchecksum(const unsigned char *rom, unsigned long length)
+{
+	unsigned char sum, c;
+
+	for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
+		sum += c;
+	return !length && !sum;
+}
+
+void __init probe_roms(void)
+{
+	const unsigned char *rom;
+	unsigned long start, length, upper;
+	unsigned char c;
+	int i;
+
+	/* video rom */
+	upper = adapter_rom_resources[0].start;
+	for (start = video_rom_resource.start; start < upper; start += 2048) {
+		rom = isa_bus_to_virt(start);
+		if (!romsignature(rom))
+			continue;
+
+		video_rom_resource.start = start;
+
+		if (probe_kernel_address(rom + 2, c) != 0)
+			continue;
+
+		/* 0 < length <= 0x7f * 512, historically */
+		length = c * 512;
+
+		/* if checksum okay, trust length byte */
+		if (length && romchecksum(rom, length))
+			video_rom_resource.end = start + length - 1;
+
+		request_resource(&iomem_resource, &video_rom_resource);
+		break;
+	}
+
+	start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
+	if (start < upper)
+		start = upper;
+
+	/* system rom */
+	request_resource(&iomem_resource, &system_rom_resource);
+	upper = system_rom_resource.start;
+
+	/* check for extension rom (ignore length byte!) */
+	rom = isa_bus_to_virt(extension_rom_resource.start);
+	if (romsignature(rom)) {
+		length = extension_rom_resource.end - extension_rom_resource.start + 1;
+		if (romchecksum(rom, length)) {
+			request_resource(&iomem_resource, &extension_rom_resource);
+			upper = extension_rom_resource.start;
+		}
+	}
+
+	/* check for adapter roms on 2k boundaries */
+	for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
+		rom = isa_bus_to_virt(start);
+		if (!romsignature(rom))
+			continue;
+
+		if (probe_kernel_address(rom + 2, c) != 0)
+			continue;
+
+		/* 0 < length <= 0x7f * 512, historically */
+		length = c * 512;
+
+		/* but accept any length that fits if checksum okay */
+		if (!length || start + length > upper || !romchecksum(rom, length))
+			continue;
+
+		adapter_rom_resources[i].start = start;
+		adapter_rom_resources[i].end = start + length - 1;
+		request_resource(&iomem_resource, &adapter_rom_resources[i]);
+
+		start = adapter_rom_resources[i++].end & ~2047UL;
+	}
+}
+
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index e274ee6ff58..865838b1179 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -606,8 +606,6 @@ static void set_mca_bus(int x)
 static void set_mca_bus(int x) { }
 #endif
 
-static void probe_roms(void);
-
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -843,147 +841,3 @@ void __init setup_arch(char **cmdline_p)
 #endif
 }
 
-static struct resource system_rom_resource = {
-	.name	= "System ROM",
-	.start	= 0xf0000,
-	.end	= 0xfffff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-static struct resource extension_rom_resource = {
-	.name	= "Extension ROM",
-	.start	= 0xe0000,
-	.end	= 0xeffff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-static struct resource adapter_rom_resources[] = { {
-	.name 	= "Adapter ROM",
-	.start	= 0xc8000,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-} };
-
-static struct resource video_rom_resource = {
-	.name 	= "Video ROM",
-	.start	= 0xc0000,
-	.end	= 0xc7fff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-#define ROMSIGNATURE 0xaa55
-
-static int __init romsignature(const unsigned char *rom)
-{
-	const unsigned short * const ptr = (const unsigned short *)rom;
-	unsigned short sig;
-
-	return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
-}
-
-static int __init romchecksum(const unsigned char *rom, unsigned long length)
-{
-	unsigned char sum, c;
-
-	for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
-		sum += c;
-	return !length && !sum;
-}
-
-static void __init probe_roms(void)
-{
-	const unsigned char *rom;
-	unsigned long start, length, upper;
-	unsigned char c;
-	int i;
-
-	/* video rom */
-	upper = adapter_rom_resources[0].start;
-	for (start = video_rom_resource.start; start < upper; start += 2048) {
-		rom = isa_bus_to_virt(start);
-		if (!romsignature(rom))
-			continue;
-
-		video_rom_resource.start = start;
-
-		if (probe_kernel_address(rom + 2, c) != 0)
-			continue;
-
-		/* 0 < length <= 0x7f * 512, historically */
-		length = c * 512;
-
-		/* if checksum okay, trust length byte */
-		if (length && romchecksum(rom, length))
-			video_rom_resource.end = start + length - 1;
-
-		request_resource(&iomem_resource, &video_rom_resource);
-		break;
-	}
-
-	start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
-	if (start < upper)
-		start = upper;
-
-	/* system rom */
-	request_resource(&iomem_resource, &system_rom_resource);
-	upper = system_rom_resource.start;
-
-	/* check for extension rom (ignore length byte!) */
-	rom = isa_bus_to_virt(extension_rom_resource.start);
-	if (romsignature(rom)) {
-		length = extension_rom_resource.end - extension_rom_resource.start + 1;
-		if (romchecksum(rom, length)) {
-			request_resource(&iomem_resource, &extension_rom_resource);
-			upper = extension_rom_resource.start;
-		}
-	}
-
-	/* check for adapter roms on 2k boundaries */
-	for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
-		rom = isa_bus_to_virt(start);
-		if (!romsignature(rom))
-			continue;
-
-		if (probe_kernel_address(rom + 2, c) != 0)
-			continue;
-
-		/* 0 < length <= 0x7f * 512, historically */
-		length = c * 512;
-
-		/* but accept any length that fits if checksum okay */
-		if (!length || start + length > upper || !romchecksum(rom, length))
-			continue;
-
-		adapter_rom_resources[i].start = start;
-		adapter_rom_resources[i].end = start + length - 1;
-		request_resource(&iomem_resource, &adapter_rom_resources[i]);
-
-		start = adapter_rom_resources[i++].end & ~2047UL;
-	}
-}
-
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index b434bdd82ba..cf87d6d3675 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -54,6 +54,7 @@ extern struct boot_params boot_params;
 #ifdef __i386__
 
 void __init i386_start_kernel(void);
+extern void probe_roms(void);
 
 extern unsigned long init_pg_tables_start;
 extern unsigned long init_pg_tables_end;
-- 
cgit v1.2.3-70-g09d2


From ce97c40e28223c148e142bda7af48fd0f27c81f9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 21 Jun 2008 20:22:09 -0700
Subject: x86: move reserve_standard_io_resource to setup.c

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c    | 32 ++++++++++++++++++++++++++
 arch/x86/kernel/setup_32.c | 57 +---------------------------------------------
 include/asm-x86/setup.h    |  2 ++
 3 files changed, 35 insertions(+), 56 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 56aee55cf8d..3b9ec81ba4f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -468,4 +468,36 @@ void __init reserve_crashkernel(void)
 void __init reserve_crashkernel(void)
 {}
 #endif
+static struct resource standard_io_resources[] = {
+	{ .name = "dma1", .start = 0x00, .end = 0x1f,
+		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
+	{ .name = "pic1", .start = 0x20, .end = 0x21,
+		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
+	{ .name = "timer0", .start = 0x40, .end = 0x43,
+		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
+	{ .name = "timer1", .start = 0x50, .end = 0x53,
+		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
+	{ .name = "keyboard", .start = 0x60, .end = 0x60,
+		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
+	{ .name = "keyboard", .start = 0x64, .end = 0x64,
+		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
+	{ .name = "dma page reg", .start = 0x80, .end = 0x8f,
+		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
+	{ .name = "pic2", .start = 0xa0, .end = 0xa1,
+		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
+	{ .name = "dma2", .start = 0xc0, .end = 0xdf,
+		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
+	{ .name = "fpu", .start = 0xf0, .end = 0xff,
+		.flags = IORESOURCE_BUSY | IORESOURCE_IO }
+};
+
+void __init reserve_standard_io_resources(void)
+{
+	int i;
+
+	/* request I/O space for devices used on all i[345]86 PCs */
+	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
+		request_resource(&ioport_resource, &standard_io_resources[i]);
+
+}
 
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 602a45c59ff..2574ec8234c 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -108,58 +108,6 @@ static struct resource video_ram_resource = {
 	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
 };
 
-static struct resource standard_io_resources[] = { {
-	.name	= "dma1",
-	.start	= 0x0000,
-	.end	= 0x001f,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "pic1",
-	.start	= 0x0020,
-	.end	= 0x0021,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name   = "timer0",
-	.start	= 0x0040,
-	.end    = 0x0043,
-	.flags  = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name   = "timer1",
-	.start  = 0x0050,
-	.end    = 0x0053,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "keyboard",
-	.start	= 0x0060,
-	.end	= 0x0060,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "keyboard",
-	.start	= 0x0064,
-	.end	= 0x0064,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "dma page reg",
-	.start	= 0x0080,
-	.end	= 0x008f,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "pic2",
-	.start	= 0x00a0,
-	.end	= 0x00a1,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "dma2",
-	.start	= 0x00c0,
-	.end	= 0x00df,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "fpu",
-	.start	= 0x00f0,
-	.end	= 0x00ff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-} };
-
 /* cpu data as detected by the assembly code in head.S */
 struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
 /* common cpu data for all cpus */
@@ -614,7 +562,6 @@ static void set_mca_bus(int x) { }
  */
 void __init setup_arch(char **cmdline_p)
 {
-	int i;
 	unsigned long max_low_pfn;
 
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
@@ -824,9 +771,7 @@ void __init setup_arch(char **cmdline_p)
 	e820_mark_nosave_regions(max_low_pfn);
 
 	request_resource(&iomem_resource, &video_ram_resource);
-	/* request I/O space for devices used on all i[345]86 PCs */
-	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
-		request_resource(&ioport_resource, &standard_io_resources[i]);
+	reserve_standard_io_resources();
 
 	e820_setup_gap();
 
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index cf87d6d3675..bb12a1619c1 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -38,6 +38,8 @@ void reserve_crashkernel(void);
 #ifndef __ASSEMBLY__
 #include <asm/bootparam.h>
 
+void reserve_standard_io_resources(void);
+
 #ifndef _SETUP
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 1f75d7e32ed47b2ab8570771a2ce8c707a7225a2 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 22 Jun 2008 02:44:49 -0700
Subject: x86: introduce initmem_init for 64 bit

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c     | 16 ++++++++++++++++
 arch/x86/mm/numa_64.c     |  2 +-
 include/asm-x86/numa_64.h |  1 -
 include/asm-x86/page_64.h |  1 +
 4 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 97c2bc741e9..99a091ee5a6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -611,6 +611,22 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned lon
 }
 
 #ifndef CONFIG_NUMA
+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long bootmap_size, bootmap;
+
+	bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
+	bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
+				 PAGE_SIZE);
+	if (bootmap == -1L)
+		panic("Cannot find bootmem map of size %ld\n", bootmap_size);
+	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
+	e820_register_active_regions(0, start_pfn, end_pfn);
+	free_bootmem_with_active_regions(0, end_pfn);
+	early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
+	reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
+}
+
 void __init paging_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index c4557e25f60..316e5f961ef 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -514,7 +514,7 @@ out:
 }
 #endif /* CONFIG_NUMA_EMU */
 
-void __init numa_initmem_init(unsigned long start_pfn, unsigned long last_pfn)
+void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
 {
 	int i;
 
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index b510daf4f4d..3830094434a 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -22,7 +22,6 @@ extern int hotadd_percent;
 
 extern s16 apicid_to_node[MAX_LOCAL_APIC];
 
-extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
 extern unsigned long numa_free_all_bootmem(void);
 extern void setup_node_bootmem(int nodeid, unsigned long start,
 			       unsigned long end);
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index 6ea72859c49..ac37643078e 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -83,6 +83,7 @@ typedef struct { pteval_t pte; } pte_t;
 extern unsigned long init_memory_mapping(unsigned long start,
 					 unsigned long end);
 
+extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
 #endif	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_FLATMEM
-- 
cgit v1.2.3-70-g09d2


From b2ac82a0909aea0d2620ba4c189f37c567c21fe5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 22 Jun 2008 02:45:39 -0700
Subject: x86: introduce initmem_init for 32 bit

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_32.c | 94 +---------------------------------------------
 arch/x86/mm/discontig_32.c |  4 +-
 arch/x86/mm/init_32.c      | 90 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/page_32.h  |  5 +++
 4 files changed, 99 insertions(+), 94 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index d24ac268523..190546bd3bd 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -300,71 +300,11 @@ unsigned long __init find_max_low_pfn(void)
 	return max_low_pfn;
 }
 
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-static void __init setup_bootmem_allocator(void);
-static unsigned long __init setup_memory(void)
-{
-	/*
-	 * partially used pages are not usable - thus
-	 * we are rounding upwards:
-	 */
-	min_low_pfn = PFN_UP(init_pg_tables_end);
-
-	max_low_pfn = find_max_low_pfn();
-
-#ifdef CONFIG_HIGHMEM
-	highstart_pfn = highend_pfn = max_pfn;
-	if (max_pfn > max_low_pfn) {
-		highstart_pfn = max_low_pfn;
-	}
-	memory_present(0, 0, highend_pfn);
-	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
-		pages_to_mb(highend_pfn - highstart_pfn));
-	num_physpages = highend_pfn;
-	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
-#else
-	memory_present(0, 0, max_low_pfn);
-	num_physpages = max_low_pfn;
-	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
-#endif
-#ifdef CONFIG_FLATMEM
-	max_mapnr = num_physpages;
-#endif
-	printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
-			pages_to_mb(max_low_pfn));
-
-	setup_bootmem_allocator();
-
-	return max_low_pfn;
-}
-
-static void __init zone_sizes_init(void)
-{
-	unsigned long max_zone_pfns[MAX_NR_ZONES];
-	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-	max_zone_pfns[ZONE_DMA] =
-		virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
-	remove_all_active_ranges();
-#ifdef CONFIG_HIGHMEM
-	max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
-	e820_register_active_regions(0, 0, highend_pfn);
-#else
-	e820_register_active_regions(0, 0, max_low_pfn);
-#endif
-
-	free_area_init_nodes(max_zone_pfns);
-}
-#else
-extern unsigned long __init setup_memory(void);
-extern void zone_sizes_init(void);
-#endif /* !CONFIG_NEED_MULTIPLE_NODES */
-
 #ifdef CONFIG_BLK_DEV_INITRD
 
 static bool do_relocate_initrd = false;
 
-static void __init reserve_initrd(void)
+void __init reserve_initrd(void)
 {
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
@@ -480,36 +420,6 @@ static void __init relocate_initrd(void)
 
 #endif /* CONFIG_BLK_DEV_INITRD */
 
-void __init setup_bootmem_allocator(void)
-{
-	int i;
-	unsigned long bootmap_size, bootmap;
-	/*
-	 * Initialize the boot-time allocator (with low memory only):
-	 */
-	bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
-	bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
-				 max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
-				 PAGE_SIZE);
-	if (bootmap == -1L)
-		panic("Cannot find bootmem map of size %ld\n", bootmap_size);
-	reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
-#ifdef CONFIG_BLK_DEV_INITRD
-	reserve_initrd();
-#endif
-	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn);
-	printk(KERN_INFO "  mapped low ram: 0 - %08lx\n",
-		 max_pfn_mapped<<PAGE_SHIFT);
-	printk(KERN_INFO "  low ram: %08lx - %08lx\n",
-		 min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
-	printk(KERN_INFO "  bootmap %08lx - %08lx\n",
-		 bootmap, bootmap + bootmap_size);
-	for_each_online_node(i)
-		free_bootmem_with_active_regions(i, max_low_pfn);
-	early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
-
-}
-
 /*
  * The node 0 pgdat is initialized before all of these because
  * it's needed for bootmem.  node>0 pgdats have their virtual
@@ -666,7 +576,7 @@ void __init setup_arch(char **cmdline_p)
         acpi_numa_init();
 #endif
 
-	max_low_pfn = setup_memory();
+	max_low_pfn = initmem_init(0, max_pfn);
 
 #ifdef CONFIG_ACPI_SLEEP
 	/*
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index a2f73ba42b8..3e75be46c4f 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -309,8 +309,8 @@ static void init_remap_allocator(int nid)
 		(ulong) node_remap_end_vaddr[nid]);
 }
 
-extern void setup_bootmem_allocator(void);
-unsigned long __init setup_memory(void)
+unsigned long __init initmem_init(unsigned long start_pfn,
+				  unsigned long end_pfn)
 {
 	int nid;
 	unsigned long system_start_pfn, system_max_low_pfn;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index a0484adbf59..9bc8607d798 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -540,6 +540,96 @@ static void __init set_nx(void)
 }
 #endif
 
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+extern unsigned long find_max_low_pfn(void);
+unsigned long __init initmem_init(unsigned long start_pfn,
+				  unsigned long end_pfn)
+{
+	/*
+	 * partially used pages are not usable - thus
+	 * we are rounding upwards:
+	 */
+	min_low_pfn = PFN_UP(init_pg_tables_end);
+
+	max_low_pfn = find_max_low_pfn();
+
+#ifdef CONFIG_HIGHMEM
+	highstart_pfn = highend_pfn = max_pfn;
+	if (max_pfn > max_low_pfn)
+		highstart_pfn = max_low_pfn;
+	memory_present(0, 0, highend_pfn);
+	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
+		pages_to_mb(highend_pfn - highstart_pfn));
+	num_physpages = highend_pfn;
+	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
+#else
+	memory_present(0, 0, max_low_pfn);
+	num_physpages = max_low_pfn;
+	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
+#endif
+#ifdef CONFIG_FLATMEM
+	max_mapnr = num_physpages;
+#endif
+	printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
+			pages_to_mb(max_low_pfn));
+
+	setup_bootmem_allocator();
+
+	return max_low_pfn;
+}
+
+void __init zone_sizes_init(void)
+{
+	unsigned long max_zone_pfns[MAX_NR_ZONES];
+	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+	max_zone_pfns[ZONE_DMA] =
+		virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+	remove_all_active_ranges();
+#ifdef CONFIG_HIGHMEM
+	max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
+	e820_register_active_regions(0, 0, highend_pfn);
+#else
+	e820_register_active_regions(0, 0, max_low_pfn);
+#endif
+
+	free_area_init_nodes(max_zone_pfns);
+}
+#endif /* !CONFIG_NEED_MULTIPLE_NODES */
+
+extern void reserve_initrd(void);
+
+void __init setup_bootmem_allocator(void)
+{
+	int i;
+	unsigned long bootmap_size, bootmap;
+	/*
+	 * Initialize the boot-time allocator (with low memory only):
+	 */
+	bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
+	bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
+				 max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
+				 PAGE_SIZE);
+	if (bootmap == -1L)
+		panic("Cannot find bootmem map of size %ld\n", bootmap_size);
+	reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
+#ifdef CONFIG_BLK_DEV_INITRD
+	reserve_initrd();
+#endif
+	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn);
+	printk(KERN_INFO "  mapped low ram: 0 - %08lx\n",
+		 max_pfn_mapped<<PAGE_SHIFT);
+	printk(KERN_INFO "  low ram: %08lx - %08lx\n",
+		 min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
+	printk(KERN_INFO "  bootmap %08lx - %08lx\n",
+		 bootmap, bootmap + bootmap_size);
+	for_each_online_node(i)
+		free_bootmem_with_active_regions(i, max_low_pfn);
+	early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
+
+}
+
+
 /*
  * paging_init() sets up the page tables - note that the first 8MB are
  * already mapped by head.S.
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 73ed2e4ebf9..a0713d175f4 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -92,6 +92,11 @@ extern int sysctl_legacy_va_layout;
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
 #define MAXMEM			(-__PAGE_OFFSET - __VMALLOC_RESERVE)
 
+extern unsigned long initmem_init(unsigned long, unsigned long);
+extern void zone_sizes_init(void);
+extern void setup_bootmem_allocator(void);
+
+
 #ifdef CONFIG_X86_USE_3DNOW
 #include <asm/mmx.h>
 
-- 
cgit v1.2.3-70-g09d2


From 225c37d71bc8b97eb2063e8eda153b383328b20b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 22 Jun 2008 02:46:58 -0700
Subject: x86: introduce reserve_initrd

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_32.c | 16 ++++++++++------
 arch/x86/mm/init_32.c      |  6 ++----
 include/asm-x86/setup.h    |  2 ++
 3 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 190546bd3bd..90b51047ce6 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -336,7 +336,7 @@ void __init reserve_initrd(void)
 		 * in i386_start_kernel
 		 */
 		initrd_start = ramdisk_image + PAGE_OFFSET;
-		initrd_end = initrd_start+ramdisk_size;
+		initrd_end = initrd_start + ramdisk_size;
 		return;
 	}
 
@@ -363,7 +363,7 @@ void __init reserve_initrd(void)
 
 #define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
 
-static void __init relocate_initrd(void)
+static void __init post_reserve_initrd(void)
 {
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
@@ -417,7 +417,13 @@ static void __init relocate_initrd(void)
 	/* need to free that, otherwise init highmem will reserve it again */
 	free_early(ramdisk_image, ramdisk_image+ramdisk_size);
 }
-
+#else
+void __init reserve_initrd(void)
+{
+}
+static void __init post_reserve_initrd(void)
+{
+}
 #endif /* CONFIG_BLK_DEV_INITRD */
 
 /*
@@ -632,9 +638,7 @@ void __init setup_arch(char **cmdline_p)
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 */
 
-#ifdef CONFIG_BLK_DEV_INITRD
-	relocate_initrd();
-#endif
+	post_reserve_initrd();
 
 	remapped_pgdat_init();
 	sparse_init();
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9bc8607d798..98080782ee4 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -597,8 +597,6 @@ void __init zone_sizes_init(void)
 }
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
-extern void reserve_initrd(void);
-
 void __init setup_bootmem_allocator(void)
 {
 	int i;
@@ -613,9 +611,9 @@ void __init setup_bootmem_allocator(void)
 	if (bootmap == -1L)
 		panic("Cannot find bootmem map of size %ld\n", bootmap_size);
 	reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
-#ifdef CONFIG_BLK_DEV_INITRD
+
 	reserve_initrd();
-#endif
+
 	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn);
 	printk(KERN_INFO "  mapped low ram: 0 - %08lx\n",
 		 max_pfn_mapped<<PAGE_SHIFT);
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index bb12a1619c1..8f85b245056 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -39,6 +39,8 @@ void reserve_crashkernel(void);
 #include <asm/bootparam.h>
 
 void reserve_standard_io_resources(void);
+void reserve_initrd(void);
+
 
 #ifndef _SETUP
 
-- 
cgit v1.2.3-70-g09d2


From 2ec65f8b89ea003c27ff7723525a2ee335a2b393 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Mon, 23 Jun 2008 03:05:30 -0700
Subject: x86: clean up using max_low_pfn on 32-bit

so that max_low_pfn is not changed after it is set.
so we can move that early and out of initmem_init.

could call find_low_pfn_range just after max_pfn is set.

also could move reserve_initrd out of setup_bootmem_allocator

so 32bit is more like 64bit.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_32.c | 16 ++++++++++------
 arch/x86/mm/discontig_32.c | 22 +++++++---------------
 arch/x86/mm/init_32.c      | 25 +++++++++----------------
 include/asm-x86/page_32.h  |  3 ++-
 include/asm-x86/setup.h    |  2 --
 5 files changed, 28 insertions(+), 40 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 9a08490a388..b42f570a5a5 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -188,13 +188,14 @@ static inline void copy_edd(void)
 
 static bool do_relocate_initrd = false;
 
-void __init reserve_initrd(void)
+static void __init reserve_initrd(void)
 {
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
 	u64 ramdisk_end   = ramdisk_image + ramdisk_size;
 	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
 	u64 ramdisk_here;
+	u64 ramdisk_target;
 
 	if (!boot_params.hdr.type_of_loader ||
 	    !ramdisk_image || !ramdisk_size)
@@ -202,7 +203,7 @@ void __init reserve_initrd(void)
 
 	initrd_start = 0;
 
-	if (ramdisk_size >= end_of_lowmem/2) {
+	if (ramdisk_size >= (end_of_lowmem>>1)) {
 		free_early(ramdisk_image, ramdisk_end);
 		printk(KERN_ERR "initrd too large to handle, "
 		       "disabling initrd\n");
@@ -225,7 +226,8 @@ void __init reserve_initrd(void)
 	}
 
 	/* We need to move the initrd down into lowmem */
-	ramdisk_here = find_e820_area(min_low_pfn<<PAGE_SHIFT,
+	ramdisk_target = max_pfn_mapped<<PAGE_SHIFT;
+	ramdisk_here = find_e820_area(min(ramdisk_target, end_of_lowmem>>1),
 				 end_of_lowmem, ramdisk_size,
 				 PAGE_SIZE);
 
@@ -346,8 +348,6 @@ static void set_mca_bus(int x) { }
  */
 void __init setup_arch(char **cmdline_p)
 {
-	unsigned long max_low_pfn;
-
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
 	pre_setup_arch_hook();
 	early_cpu_init();
@@ -450,6 +450,10 @@ void __init setup_arch(char **cmdline_p)
 		max_pfn = e820_end_of_ram();
 	}
 
+	find_low_pfn_range();
+
+	reserve_initrd();
+
 	dmi_scan_machine();
 
 	io_delay_init();
@@ -466,7 +470,7 @@ void __init setup_arch(char **cmdline_p)
         acpi_numa_init();
 #endif
 
-	max_low_pfn = initmem_init(0, max_pfn);
+	initmem_init(0, max_pfn);
 
 #ifdef CONFIG_ACPI_SLEEP
 	/*
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 3e75be46c4f..1dfff700264 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -309,11 +309,10 @@ static void init_remap_allocator(int nid)
 		(ulong) node_remap_end_vaddr[nid]);
 }
 
-unsigned long __init initmem_init(unsigned long start_pfn,
+void __init initmem_init(unsigned long start_pfn,
 				  unsigned long end_pfn)
 {
 	int nid;
-	unsigned long system_start_pfn, system_max_low_pfn;
 	long kva_target_pfn;
 
 	/*
@@ -324,17 +323,11 @@ unsigned long __init initmem_init(unsigned long start_pfn,
 	 * and ZONE_HIGHMEM.
 	 */
 
-	/* call find_max_low_pfn at first, it could update max_pfn */
-	system_max_low_pfn = max_low_pfn = find_max_low_pfn();
-
 	remove_all_active_ranges();
 	get_memcfg_numa();
 
 	kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
 
-	/* partially used pages are not usable - thus round upwards */
-	system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
-
 	kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
 	do {
 		kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT,
@@ -357,19 +350,19 @@ unsigned long __init initmem_init(unsigned long start_pfn,
 		     "KVA PG");
 #ifdef CONFIG_HIGHMEM
 	highstart_pfn = highend_pfn = max_pfn;
-	if (max_pfn > system_max_low_pfn)
-		highstart_pfn = system_max_low_pfn;
+	if (max_pfn > max_low_pfn)
+		highstart_pfn = max_low_pfn;
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 	       pages_to_mb(highend_pfn - highstart_pfn));
 	num_physpages = highend_pfn;
 	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
-	num_physpages = system_max_low_pfn;
-	high_memory = (void *) __va(system_max_low_pfn * PAGE_SIZE - 1) + 1;
+	num_physpages = max_low_pfn;
+	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
 	printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
-			pages_to_mb(system_max_low_pfn));
-	printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n", 
+			pages_to_mb(max_low_pfn));
+	printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n",
 			min_low_pfn, max_low_pfn, highstart_pfn);
 
 	printk("Low memory ends at vaddr %08lx\n",
@@ -387,7 +380,6 @@ unsigned long __init initmem_init(unsigned long start_pfn,
 	memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
 	NODE_DATA(0)->bdata = &node0_bdata;
 	setup_bootmem_allocator();
-	return max_low_pfn;
 }
 
 void __init zone_sizes_init(void)
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d1017336f1b..27b82931294 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -561,9 +561,15 @@ early_param("highmem", parse_highmem);
 /*
  * Determine low and high memory ranges:
  */
-unsigned long __init find_max_low_pfn(void)
+void __init find_low_pfn_range(void)
 {
-	unsigned long max_low_pfn;
+	/* it could update max_pfn */
+
+	/*
+	 * partially used pages are not usable - thus
+	 * we are rounding upwards:
+	 */
+	min_low_pfn = PFN_UP(init_pg_tables_end);
 
 	max_low_pfn = max_pfn;
 	if (max_low_pfn > MAXMEM_PFN) {
@@ -625,21 +631,12 @@ unsigned long __init find_max_low_pfn(void)
 					" kernel!\n");
 #endif
 	}
-	return max_low_pfn;
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-unsigned long __init initmem_init(unsigned long start_pfn,
+void __init initmem_init(unsigned long start_pfn,
 				  unsigned long end_pfn)
 {
-	/*
-	 * partially used pages are not usable - thus
-	 * we are rounding upwards:
-	 */
-	min_low_pfn = PFN_UP(init_pg_tables_end);
-
-	max_low_pfn = find_max_low_pfn();
-
 #ifdef CONFIG_HIGHMEM
 	highstart_pfn = highend_pfn = max_pfn;
 	if (max_pfn > max_low_pfn)
@@ -661,8 +658,6 @@ unsigned long __init initmem_init(unsigned long start_pfn,
 			pages_to_mb(max_low_pfn));
 
 	setup_bootmem_allocator();
-
-	return max_low_pfn;
 }
 
 void __init zone_sizes_init(void)
@@ -699,8 +694,6 @@ void __init setup_bootmem_allocator(void)
 		panic("Cannot find bootmem map of size %ld\n", bootmap_size);
 	reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
 
-	reserve_initrd();
-
 	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn);
 	printk(KERN_INFO "  mapped low ram: 0 - %08lx\n",
 		 max_pfn_mapped<<PAGE_SHIFT);
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index a0713d175f4..3810d14051e 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -92,7 +92,8 @@ extern int sysctl_legacy_va_layout;
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
 #define MAXMEM			(-__PAGE_OFFSET - __VMALLOC_RESERVE)
 
-extern unsigned long initmem_init(unsigned long, unsigned long);
+extern void find_low_pfn_range(void);
+extern void initmem_init(unsigned long, unsigned long);
 extern void zone_sizes_init(void);
 extern void setup_bootmem_allocator(void);
 
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 8f85b245056..bb12a1619c1 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -39,8 +39,6 @@ void reserve_crashkernel(void);
 #include <asm/bootparam.h>
 
 void reserve_standard_io_resources(void);
-void reserve_initrd(void);
-
 
 #ifndef _SETUP
 
-- 
cgit v1.2.3-70-g09d2


From 11cd0bc140b5d66566c9eb49c1058737888cd75c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Mon, 23 Jun 2008 19:51:10 -0700
Subject: x86: move some func calling from setup_arch to paging_init

those function depend on paging setup pgtable, so they could access
the ram in bootmem region but just get mapped.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_32.c | 34 ++--------------------------------
 arch/x86/mm/init_32.c      | 29 +++++++++++++++++++++++++++++
 include/asm-x86/setup.h    |  1 +
 3 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 1e670372c19..220d92faf0a 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -249,7 +249,7 @@ static void __init reserve_initrd(void)
 
 #define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
 
-static void __init post_reserve_initrd(void)
+void __init post_reserve_initrd(void)
 {
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
@@ -307,29 +307,11 @@ static void __init post_reserve_initrd(void)
 void __init reserve_initrd(void)
 {
 }
-static void __init post_reserve_initrd(void)
+void __init post_reserve_initrd(void)
 {
 }
 #endif /* CONFIG_BLK_DEV_INITRD */
 
-/*
- * The node 0 pgdat is initialized before all of these because
- * it's needed for bootmem.  node>0 pgdats have their virtual
- * space allocated before the pagetables are in place to access
- * them, so they can't be cleared then.
- *
- * This should all compile down to nothing when NUMA is off.
- */
-static void __init remapped_pgdat_init(void)
-{
-	int nid;
-
-	for_each_online_node(nid) {
-		if (nid != 0)
-			memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
-	}
-}
-
 #ifdef CONFIG_MCA
 static void set_mca_bus(int x)
 {
@@ -524,18 +506,6 @@ void __init setup_arch(char **cmdline_p)
 		init_ohci1394_dma_on_all_controllers();
 #endif
 
-	/*
-	 * NOTE: at this point the bootmem allocator is fully available.
-	 */
-
-	post_reserve_initrd();
-
-	remapped_pgdat_init();
-	sparse_init();
-	zone_sizes_init();
-
-	paravirt_post_allocator_init();
-
 #ifdef CONFIG_X86_GENERICARCH
 	generic_apic_probe();
 #endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9bb35cf8bc4..20ca29591ab 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -705,6 +705,23 @@ void __init setup_bootmem_allocator(void)
 
 }
 
+/*
+ * The node 0 pgdat is initialized before all of these because
+ * it's needed for bootmem.  node>0 pgdats have their virtual
+ * space allocated before the pagetables are in place to access
+ * them, so they can't be cleared then.
+ *
+ * This should all compile down to nothing when NUMA is off.
+ */
+static void __init remapped_pgdat_init(void)
+{
+	int nid;
+
+	for_each_online_node(nid) {
+		if (nid != 0)
+			memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+	}
+}
 
 /*
  * paging_init() sets up the page tables - note that the first 8MB are
@@ -727,6 +744,18 @@ void __init paging_init(void)
 	__flush_tlb_all();
 
 	kmap_init();
+
+	/*
+	 * NOTE: at this point the bootmem allocator is fully available.
+	 */
+
+	post_reserve_initrd();
+
+	remapped_pgdat_init();
+	sparse_init();
+	zone_sizes_init();
+
+	paravirt_post_allocator_init();
 }
 
 /*
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index bb12a1619c1..4ebb4ef14c0 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -39,6 +39,7 @@ void reserve_crashkernel(void);
 #include <asm/bootparam.h>
 
 void reserve_standard_io_resources(void);
+extern void post_reserve_initrd(void);
 
 #ifndef _SETUP
 
-- 
cgit v1.2.3-70-g09d2


From c4ba1320b7075e9ce33ad0afaef43ba13260b4c2 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Sun, 22 Jun 2008 07:22:07 -0700
Subject: x86 boot: allow overlapping early reserve memory ranges

Add support for overlapping early memory reservations.

In general, they still can't overlap, and will panic
with "Overlapping early reservations" if they do overlap.

But if a memory range is reserved with the new call:
    reserve_early_overlap_ok()
rather than with the usual call:
    reserve_early()
then subsequent early reservations are allowed to overlap.

This new reserve_early_overlap_ok() call is only used in one
place so far, which is the "BIOS reserved" reservation for the
the EBDA region, which out of Paranoia reserves more than what
the BIOS might have specified, and which thus might overlap with
another legitimate early memory reservation (such as, perhaps,
the EFI memmap.)

Signed-off-by: Paul Jackson <pj@sgi.com>
Cc: "Yinghai Lu" <yhlu.kernel@gmail.com>
Cc: "Jack Steiner" <steiner@sgi.com>
Cc: "Mike Travis" <travis@sgi.com>
Cc: "Huang
Cc: Ying" <ying.huang@intel.com>
Cc: "Andi Kleen" <andi@firstfloor.org>
Cc: "Andrew Morton" <akpm@linux-foundation.org>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c | 140 +++++++++++++++++++++++++++++++++++++++++++++----
 arch/x86/kernel/head.c |   2 +-
 include/asm-x86/e820.h |   1 +
 3 files changed, 133 insertions(+), 10 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 8a8afbdeb34..600c9de237a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -604,6 +604,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
 struct early_res {
 	u64 start, end;
 	char name[16];
+	char overlap_ok;
 };
 static struct early_res early_res[MAX_EARLY_RES] __initdata = {
 	{ 0, PAGE_SIZE, "BIOS data page" },	/* BIOS data page */
@@ -640,7 +641,93 @@ static int __init find_overlapped_early(u64 start, u64 end)
 	return i;
 }
 
-void __init reserve_early(u64 start, u64 end, char *name)
+/*
+ * Drop the i-th range from the early reservation map,
+ * by copying any higher ranges down one over it, and
+ * clearing what had been the last slot.
+ */
+static void __init drop_range(int i)
+{
+	int j;
+
+	for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
+		;
+
+	memmove(&early_res[i], &early_res[i + 1],
+	       (j - 1 - i) * sizeof(struct early_res));
+
+	early_res[j - 1].end = 0;
+}
+
+/*
+ * Split any existing ranges that:
+ *  1) are marked 'overlap_ok', and
+ *  2) overlap with the stated range [start, end)
+ * into whatever portion (if any) of the existing range is entirely
+ * below or entirely above the stated range.  Drop the portion
+ * of the existing range that overlaps with the stated range,
+ * which will allow the caller of this routine to then add that
+ * stated range without conflicting with any existing range.
+ */
+static void __init drop_overlaps_that_are_ok(u64 start, u64 end)
+{
+	int i;
+	struct early_res *r;
+	u64 lower_start, lower_end;
+	u64 upper_start, upper_end;
+	char name[16];
+
+	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+		r = &early_res[i];
+
+		/* Continue past non-overlapping ranges */
+		if (end <= r->start || start >= r->end)
+			continue;
+
+		/*
+		 * Leave non-ok overlaps as is; let caller
+		 * panic "Overlapping early reservations"
+		 * when it hits this overlap.
+		 */
+		if (!r->overlap_ok)
+			return;
+
+		/*
+		 * We have an ok overlap.  We will drop it from the early
+		 * reservation map, and add back in any non-overlapping
+		 * portions (lower or upper) as separate, overlap_ok,
+		 * non-overlapping ranges.
+		 */
+
+		/* 1. Note any non-overlapping (lower or upper) ranges. */
+		strncpy(name, r->name, sizeof(name) - 1);
+
+		lower_start = lower_end = 0;
+		upper_start = upper_end = 0;
+		if (r->start < start) {
+		 	lower_start = r->start;
+			lower_end = start;
+		}
+		if (r->end > end) {
+			upper_start = end;
+			upper_end = r->end;
+		}
+
+		/* 2. Drop the original ok overlapping range */
+		drop_range(i);
+
+		i--;		/* resume for-loop on copied down entry */
+
+		/* 3. Add back in any non-overlapping ranges. */
+		if (lower_end)
+			reserve_early_overlap_ok(lower_start, lower_end, name);
+		if (upper_end)
+			reserve_early_overlap_ok(upper_start, upper_end, name);
+	}
+}
+
+static void __init __reserve_early(u64 start, u64 end, char *name,
+						int overlap_ok)
 {
 	int i;
 	struct early_res *r;
@@ -656,14 +743,55 @@ void __init reserve_early(u64 start, u64 end, char *name)
 		      r->end - 1, r->name);
 	r->start = start;
 	r->end = end;
+	r->overlap_ok = overlap_ok;
 	if (name)
 		strncpy(r->name, name, sizeof(r->name) - 1);
 }
 
+/*
+ * A few early reservtations come here.
+ *
+ * The 'overlap_ok' in the name of this routine does -not- mean it
+ * is ok for these reservations to overlap an earlier reservation.
+ * Rather it means that it is ok for subsequent reservations to
+ * overlap this one.
+ *
+ * Use this entry point to reserve early ranges when you are doing
+ * so out of "Paranoia", reserving perhaps more memory than you need,
+ * just in case, and don't mind a subsequent overlapping reservation
+ * that is known to be needed.
+ *
+ * The drop_overlaps_that_are_ok() call here isn't really needed.
+ * It would be needed if we had two colliding 'overlap_ok'
+ * reservations, so that the second such would not panic on the
+ * overlap with the first.  We don't have any such as of this
+ * writing, but might as well tolerate such if it happens in
+ * the future.
+ */
+void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
+{
+	drop_overlaps_that_are_ok(start, end);
+	__reserve_early(start, end, name, 1);
+}
+
+/*
+ * Most early reservations come here.
+ *
+ * We first have drop_overlaps_that_are_ok() drop any pre-existing
+ * 'overlap_ok' ranges, so that we can then reserve this memory
+ * range without risk of panic'ing on an overlapping overlap_ok
+ * early reservation.
+ */
+void __init reserve_early(u64 start, u64 end, char *name)
+{
+	drop_overlaps_that_are_ok(start, end);
+	__reserve_early(start, end, name, 0);
+}
+
 void __init free_early(u64 start, u64 end)
 {
 	struct early_res *r;
-	int i, j;
+	int i;
 
 	i = find_overlapped_early(start, end);
 	r = &early_res[i];
@@ -671,13 +799,7 @@ void __init free_early(u64 start, u64 end)
 		panic("free_early on not reserved area: %llx-%llx!",
 			 start, end - 1);
 
-	for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
-		;
-
-	memmove(&early_res[i], &early_res[i + 1],
-	       (j - 1 - i) * sizeof(struct early_res));
-
-	early_res[j - 1].end = 0;
+	drop_range(i);
 }
 
 void __init early_res_to_bootmem(u64 start, u64 end)
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index a727c0b9819..a6816be01cd 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -51,7 +51,7 @@ void __init reserve_ebda_region(void)
 		lowmem = 0x9f000;
 
 	/* reserve all memory between lowmem and the 1MB mark */
-	reserve_early(lowmem, 0x100000, "BIOS reserved");
+	reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved");
 }
 
 void __init reserve_setup_data(void)
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 7c32df07bae..13fa5a076aa 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -88,6 +88,7 @@ extern unsigned long end_user_pfn;
 extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
 extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
 extern void reserve_early(u64 start, u64 end, char *name);
+extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
 extern void free_early(u64 start, u64 end);
 extern void early_res_to_bootmem(u64 start, u64 end);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
-- 
cgit v1.2.3-70-g09d2


From c376d45432d935e6f1e0ff2d6be3734bcd3ba455 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Tue, 24 Jun 2008 22:52:05 +0200
Subject: x86: nmi_watchdog - use NMI_NONE by default

There is no need to keep NMI_DISABLED definition and use it
for nmi_watchdog by default. Here is the point why:

- IO-APIC and APIC chips are programmed for nmi_watchdog support at very
  early stage of kernel booting and not having nmi_watchdog specified as
  boot option lead only to nmi_watchdog becomes to NMI_NONE anyway
- enable nmi_watchdog thru /proc/sys/kernel/nmi if it was not specified at
  boot is not possible too (even having this sysfs entry)

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: macro@linux-mips.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_64.c    |  1 -
 arch/x86/kernel/io_apic_64.c |  2 --
 arch/x86/kernel/nmi.c        | 26 +++-----------------------
 arch/x86/kernel/smpboot.c    |  1 -
 include/asm-x86/nmi.h        |  3 ---
 5 files changed, 3 insertions(+), 30 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 8c751f731bc..1e3d32e27c1 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -826,7 +826,6 @@ static void __cpuinit lapic_setup_esr(void)
 void __cpuinit end_local_APIC_setup(void)
 {
 	lapic_setup_esr();
-	nmi_watchdog_default();
 	setup_apic_nmi_watchdog(NULL);
 	apic_pm_activate();
 }
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 08c48750888..2b4c40bc12c 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1729,7 +1729,6 @@ static inline void __init check_timer(void)
 		}
 		unmask_IO_APIC_irq(0);
 		if (!no_timer_check && timer_irq_works()) {
-			nmi_watchdog_default();
 			if (nmi_watchdog == NMI_IO_APIC) {
 				setup_nmi();
 				enable_8259A_irq(0);
@@ -1758,7 +1757,6 @@ static inline void __init check_timer(void)
 		if (timer_irq_works()) {
 			apic_printk(APIC_VERBOSE," works.\n");
 			timer_through_8259 = 1;
-			nmi_watchdog_default();
 			if (nmi_watchdog == NMI_IO_APIC) {
 				disable_8259A_irq(0);
 				setup_nmi();
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 427c511e7ad..32acda25e3c 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -52,7 +52,7 @@ static cpumask_t backtrace_mask = CPU_MASK_NONE;
 atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
 EXPORT_SYMBOL(nmi_active);
 
-unsigned int nmi_watchdog = NMI_DEFAULT;
+unsigned int nmi_watchdog = NMI_NONE;
 EXPORT_SYMBOL(nmi_watchdog);
 
 static int panic_on_timeout;
@@ -92,14 +92,6 @@ static inline unsigned int get_timer_irqs(int cpu)
 #endif
 }
 
-/* Run after command line and cpu_init init, but before all other checks */
-void nmi_watchdog_default(void)
-{
-	if (nmi_watchdog != NMI_DEFAULT)
-		return;
-	nmi_watchdog = NMI_NONE;
-}
-
 #ifdef CONFIG_SMP
 /*
  * The performance counters used by NMI_LOCAL_APIC don't trigger when
@@ -127,7 +119,7 @@ int __init check_nmi_watchdog(void)
 	unsigned int *prev_nmi_count;
 	int cpu;
 
-	if (nmi_watchdog == NMI_NONE || nmi_watchdog == NMI_DISABLED)
+	if (nmi_watchdog == NMI_NONE)
 		return 0;
 
 	if (!atomic_read(&nmi_active))
@@ -482,24 +474,12 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
 	if (!!old_state == !!nmi_watchdog_enabled)
 		return 0;
 
-	if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) {
+	if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_NONE) {
 		printk(KERN_WARNING
 			"NMI watchdog is permanently disabled\n");
 		return -EIO;
 	}
 
-	/* if nmi_watchdog is not set yet, then set it */
-	nmi_watchdog_default();
-
-#ifdef CONFIG_X86_32
-	if (nmi_watchdog == NMI_NONE) {
-		if (lapic_watchdog_ok())
-			nmi_watchdog = NMI_LOCAL_APIC;
-		else
-			nmi_watchdog = NMI_IO_APIC;
-	}
-#endif
-
 	if (nmi_watchdog == NMI_LOCAL_APIC) {
 		if (nmi_watchdog_enabled)
 			enable_lapic_nmi_watchdog();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3b48d1f4c7c..3b19441d78b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1139,7 +1139,6 @@ static void __init smp_cpu_index_default(void)
 void __init native_smp_prepare_cpus(unsigned int max_cpus)
 {
 	preempt_disable();
-	nmi_watchdog_default();
 	smp_cpu_index_default();
 	current_cpu_data = boot_cpu_data;
 	cpu_callin_map = cpumask_of_cpu(0);
diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index f0e435dd38f..1348e542360 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h
@@ -20,7 +20,6 @@ extern void default_do_nmi(struct pt_regs *);
 #endif
 
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
-extern void nmi_watchdog_default(void);
 extern int check_nmi_watchdog(void);
 extern int nmi_watchdog_enabled;
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
@@ -38,12 +37,10 @@ extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
 
 extern atomic_t nmi_active;
 extern unsigned int nmi_watchdog;
-#define NMI_DISABLED    -1
 #define NMI_NONE	0
 #define NMI_IO_APIC	1
 #define NMI_LOCAL_APIC	2
 #define NMI_INVALID	3
-#define NMI_DEFAULT	NMI_DISABLED
 
 struct ctl_table;
 struct file;
-- 
cgit v1.2.3-70-g09d2


From 4de0043617f949fdac538fd59335e2150cd1b863 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Tue, 24 Jun 2008 22:52:06 +0200
Subject: x86: nmi_watchdog - introduce nmi_watchdog_active() helper

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: macro@linux-mips.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/nmi.c | 13 ++++---------
 include/asm-x86/nmi.h | 13 +++++++++++++
 2 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 32acda25e3c..8dfe9db87a9 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -119,10 +119,7 @@ int __init check_nmi_watchdog(void)
 	unsigned int *prev_nmi_count;
 	int cpu;
 
-	if (nmi_watchdog == NMI_NONE)
-		return 0;
-
-	if (!atomic_read(&nmi_active))
+	if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
 		return 0;
 
 	prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
@@ -317,8 +314,7 @@ void setup_apic_nmi_watchdog(void *unused)
 void stop_apic_nmi_watchdog(void *unused)
 {
 	/* only support LOCAL and IO APICs for now */
-	if (nmi_watchdog != NMI_LOCAL_APIC &&
-	    nmi_watchdog != NMI_IO_APIC)
+	if (!nmi_watchdog_active())
 		return;
 	if (__get_cpu_var(wd_enabled) == 0)
 		return;
@@ -348,8 +344,7 @@ static DEFINE_PER_CPU(int, nmi_touch);
 
 void touch_nmi_watchdog(void)
 {
-	if (nmi_watchdog == NMI_LOCAL_APIC ||
-		nmi_watchdog == NMI_IO_APIC) {
+	if (nmi_watchdog_active()) {
 		unsigned cpu;
 
 		/*
@@ -474,7 +469,7 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
 	if (!!old_state == !!nmi_watchdog_enabled)
 		return 0;
 
-	if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_NONE) {
+	if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
 		printk(KERN_WARNING
 			"NMI watchdog is permanently disabled\n");
 		return -EIO;
diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index 1348e542360..21f8d0202a8 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h
@@ -56,6 +56,19 @@ static inline void localise_nmi_watchdog(void)
 	if (nmi_watchdog == NMI_IO_APIC)
 		nmi_watchdog = NMI_LOCAL_APIC;
 }
+
+/* check if nmi_watchdog is active (ie was specified at boot) */
+static inline int nmi_watchdog_active(void)
+{
+	/*
+	 * actually it should be:
+	 * 	return (nmi_watchdog == NMI_LOCAL_APIC ||
+	 * 		nmi_watchdog == NMI_IO_APIC)
+	 * but since they are power of two we could use a
+	 * cheaper way --cvg
+	 */
+	return nmi_watchdog & 0x3;
+}
 #endif
 
 void lapic_watchdog_stop(void);
-- 
cgit v1.2.3-70-g09d2


From bea41808efdd8815435376209f23f406f8bf435f Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:18:57 -0400
Subject: x86: asm-x86/pgtable.h: fix compiler warning

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/pgtable.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index f048975b31a..1330e7487da 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -425,6 +425,8 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
  * race with other CPU's that might be updating the dirty
  * bit at the same time.
  */
+struct vm_area_struct;
+
 #define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 extern int ptep_set_access_flags(struct vm_area_struct *vma,
 				 unsigned long address, pte_t *ptep,
-- 
cgit v1.2.3-70-g09d2


From d338c73c39a6ed0d07fe3bb07c7f12fff0dd237d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:18:58 -0400
Subject: x86: add memory clobber to save/loadsegment

Add "memory" clobbers to savesegment and loadsegment, since they can
affect memory accesses and we never want the compiler to reorder them
with respect to memory references.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/system.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index bacfceedf1d..f686aa6abfe 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -153,14 +153,14 @@ extern void load_gs_index(unsigned);
 		     "jmp 2b\n"			\
 		     ".previous\n"		\
 		     _ASM_EXTABLE(1b,3b)	\
-		     : :"r" (value), "r" (0))
+		     : :"r" (value), "r" (0) : "memory")
 
 
 /*
  * Save a segment register away
  */
 #define savesegment(seg, value)				\
-	asm volatile("mov %%" #seg ",%0":"=rm" (value))
+	asm("mov %%" #seg ",%0":"=rm" (value) : : "memory")
 
 static inline unsigned long get_limit(unsigned long segment)
 {
-- 
cgit v1.2.3-70-g09d2


From af2b1c609ff52b6469d8e67696db98c93c348b0e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:18:59 -0400
Subject: x86: add memory barriers to wrmsr

wrmsr is a special instruction which can have arbitrary system-wide
effects.  We don't want the compiler to reorder it with respect to
memory operations, so make it a memory barrier.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/msr.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 2b5f2c91db2..ca110ee73f0 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -66,7 +66,7 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
 static inline void native_write_msr(unsigned int msr,
 				    unsigned low, unsigned high)
 {
-	asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high));
+	asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
 }
 
 static inline int native_write_msr_safe(unsigned int msr,
@@ -81,7 +81,8 @@ static inline int native_write_msr_safe(unsigned int msr,
 		     _ASM_EXTABLE(2b, 3b)
 		     : "=a" (err)
 		     : "c" (msr), "0" (low), "d" (high),
-		     "i" (-EFAULT));
+		       "i" (-EFAULT)
+		     : "memory");
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 4583ed514ea9ac844a6eb02d33120beaedf6837f Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:03 -0400
Subject: x86, 64-bit: unify early_ioremap

The 32-bit early_ioremap will work equally well for 64-bit, so just use it.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c       | 52 ---------------------------------------------
 arch/x86/mm/ioremap.c       |  5 +----
 include/asm-x86/fixmap_64.h | 13 ++++++++++++
 include/asm-x86/io.h        | 13 ++++++++++++
 include/asm-x86/io_32.h     | 12 -----------
 5 files changed, 27 insertions(+), 68 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bfea7605eaa..9b6049133a8 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -257,58 +257,6 @@ static __meminit void unmap_low_page(void *adr)
 	early_iounmap(adr, PAGE_SIZE);
 }
 
-/* Must run before zap_low_mappings */
-__meminit void *early_ioremap(unsigned long addr, unsigned long size)
-{
-	pmd_t *pmd, *last_pmd;
-	unsigned long vaddr;
-	int i, pmds;
-
-	pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
-	vaddr = __START_KERNEL_map;
-	pmd = level2_kernel_pgt;
-	last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
-
-	for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
-		for (i = 0; i < pmds; i++) {
-			if (pmd_present(pmd[i]))
-				goto continue_outer_loop;
-		}
-		vaddr += addr & ~PMD_MASK;
-		addr &= PMD_MASK;
-
-		for (i = 0; i < pmds; i++, addr += PMD_SIZE)
-			set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
-		__flush_tlb_all();
-
-		return (void *)vaddr;
-continue_outer_loop:
-		;
-	}
-	printk(KERN_ERR "early_ioremap(0x%lx, %lu) failed\n", addr, size);
-
-	return NULL;
-}
-
-/*
- * To avoid virtual aliases later:
- */
-__meminit void early_iounmap(void *addr, unsigned long size)
-{
-	unsigned long vaddr;
-	pmd_t *pmd;
-	int i, pmds;
-
-	vaddr = (unsigned long)addr;
-	pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
-	pmd = level2_kernel_pgt + pmd_index(vaddr);
-
-	for (i = 0; i < pmds; i++)
-		pmd_clear(pmd + i);
-
-	__flush_tlb_all();
-}
-
 static unsigned long __meminit
 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 {
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0561fde56a5..092b3d72498 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -381,8 +381,6 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
 	return;
 }
 
-#ifdef CONFIG_X86_32
-
 int __initdata early_ioremap_debug;
 
 static int __init early_ioremap_debug_setup(char *str)
@@ -483,6 +481,7 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
 		return;
 	}
 	pte = early_ioremap_pte(addr);
+
 	if (pgprot_val(flags))
 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
 	else
@@ -624,5 +623,3 @@ void __this_fixmap_does_not_exist(void)
 {
 	WARN_ON(1);
 }
-
-#endif /* CONFIG_X86_32 */
diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h
index 626098823a0..c7dcc365f47 100644
--- a/include/asm-x86/fixmap_64.h
+++ b/include/asm-x86/fixmap_64.h
@@ -49,6 +49,19 @@ enum fixed_addresses {
 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
 	FIX_OHCI1394_BASE,
 #endif
+	__end_of_permanent_fixed_addresses,
+	/*
+	 * 256 temporary boot-time mappings, used by early_ioremap(),
+	 * before ioremap() is functional.
+	 *
+	 * We round it up to the next 512 pages boundary so that we
+	 * can have a single pgd entry and a single pte table:
+	 */
+#define NR_FIX_BTMAPS		64
+#define FIX_BTMAPS_NESTING	4
+	FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 -
+			(__end_of_permanent_fixed_addresses & 511),
+	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
 	__end_of_fixed_addresses
 };
 
diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h
index 8e9eca93f9b..c63563df4ac 100644
--- a/include/asm-x86/io.h
+++ b/include/asm-x86/io.h
@@ -72,4 +72,17 @@ extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
 				unsigned long prot_val);
 extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
 
+/*
+ * early_ioremap() and early_iounmap() are for temporary early boot-time
+ * mappings, before the real ioremap() is functional.
+ * A boot-time mapping is currently limited to at most 16 pages.
+ */
+extern void early_ioremap_init(void);
+extern void early_ioremap_clear(void);
+extern void early_ioremap_reset(void);
+extern void *early_ioremap(unsigned long offset, unsigned long size);
+extern void early_iounmap(void *addr, unsigned long size);
+extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
+
+
 #endif /* _ASM_X86_IO_H */
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index d71be8df979..4df44ed5407 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -121,18 +121,6 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
 
 extern void iounmap(volatile void __iomem *addr);
 
-/*
- * early_ioremap() and early_iounmap() are for temporary early boot-time
- * mappings, before the real ioremap() is functional.
- * A boot-time mapping is currently limited to at most 16 pages.
- */
-extern void early_ioremap_init(void);
-extern void early_ioremap_clear(void);
-extern void early_ioremap_reset(void);
-extern void *early_ioremap(unsigned long offset, unsigned long size);
-extern void early_iounmap(void *addr, unsigned long size);
-extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
-
 /*
  * ISA I/O bus memory addresses are 1:1 with the physical address.
  */
-- 
cgit v1.2.3-70-g09d2


From 43adfc26dea171558f944adbc9adecddf2d4602f Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:04 -0400
Subject: x86, 64-bit: add gate_offset() and gate_segment() macros

For calculating the offset from struct gate_struct fields.

[ gate_offset and gate_segment were broken for 32-bit. ]

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/desc_defs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h
index eccb4ea1f91..f7bacf357da 100644
--- a/include/asm-x86/desc_defs.h
+++ b/include/asm-x86/desc_defs.h
@@ -75,10 +75,14 @@ struct ldttss_desc64 {
 typedef struct gate_struct64 gate_desc;
 typedef struct ldttss_desc64 ldt_desc;
 typedef struct ldttss_desc64 tss_desc;
+#define gate_offset(g) ((g).offset_low | ((unsigned long)(g).offset_middle << 16) | ((unsigned long)(g).offset_high << 32))
+#define gate_segment(g) ((g).segment)
 #else
 typedef struct desc_struct gate_desc;
 typedef struct desc_struct ldt_desc;
 typedef struct desc_struct tss_desc;
+#define gate_offset(g)		(((g).b & 0xffff0000) | ((g).a & 0x0000ffff))
+#define gate_segment(g)		((g).a >> 16)
 #endif
 
 struct desc_ptr {
-- 
cgit v1.2.3-70-g09d2


From e7a9b0b3c32aa13f4c766eb6a4e7038260718d4c Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 25 Jun 2008 00:19:05 -0400
Subject: x86, 64-bit: use __pgd() on mk_kernel_pgd()

Use __pgd() on mk_kernel_pgd()

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/pgtable_64.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 1cc50d22d73..23f35fff5fc 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -195,7 +195,7 @@ static inline int pmd_bad(pmd_t pmd)
 #define pgd_offset_k(address) (init_level4_pgt + pgd_index((address)))
 #define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT)
 static inline int pgd_large(pgd_t pgd) { return 0; }
-#define mk_kernel_pgd(address) ((pgd_t){ (address) | _KERNPG_TABLE })
+#define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
 
 /* PUD - Level3 access */
 /* to find an entry in a page-table-directory. */
-- 
cgit v1.2.3-70-g09d2


From fb15a9b3047a245a30a51696e4d8e29b1175a598 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:06 -0400
Subject: x86: unify pgd_index

pgd_index is common for 32 and 64-bit, so move it to a common place.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/pgtable.h    | 20 ++++++++++++++++++++
 include/asm-x86/pgtable_32.h | 20 --------------------
 include/asm-x86/pgtable_64.h |  3 ---
 3 files changed, 20 insertions(+), 23 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 1330e7487da..85f851a0b99 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -357,6 +357,26 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 # include "pgtable_64.h"
 #endif
 
+/*
+ * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
+ *
+ * this macro returns the index of the entry in the pgd page which would
+ * control the given virtual address
+ */
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
+
+/*
+ * pgd_offset() returns a (pgd_t *)
+ * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
+ */
+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
+/*
+ * a shortcut which implies the use of the kernel's pgd, instead
+ * of a process's
+ */
+#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
+
+
 #define KERNEL_PGD_BOUNDARY	pgd_index(PAGE_OFFSET)
 #define KERNEL_PGD_PTRS		(PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
 
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 32ca03109a4..ec871c420d7 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -113,26 +113,6 @@ extern unsigned long pg0[];
  */
 #define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
 
-/*
- * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
- *
- * this macro returns the index of the entry in the pgd page which would
- * control the given virtual address
- */
-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
-#define pgd_index_k(addr) pgd_index((addr))
-
-/*
- * pgd_offset() returns a (pgd_t *)
- * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
- */
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
-
-/*
- * a shortcut which implies the use of the kernel's pgd, instead
- * of a process's
- */
-#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
 
 static inline int pud_large(pud_t pud) { return 0; }
 
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 23f35fff5fc..9a9350abe4e 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -190,9 +190,6 @@ static inline int pmd_bad(pmd_t pmd)
 #define pgd_page_vaddr(pgd)						\
 	((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_MASK))
 #define pgd_page(pgd)		(pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT))
-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
-#define pgd_offset(mm, address)	((mm)->pgd + pgd_index((address)))
-#define pgd_offset_k(address) (init_level4_pgt + pgd_index((address)))
 #define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT)
 static inline int pgd_large(pgd_t pgd) { return 0; }
 #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
-- 
cgit v1.2.3-70-g09d2


From c3c2fee38462fa34b90e0a5427c7fc564bb5c96c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:07 -0400
Subject: x86: unify mmu_context.h

Some amount of asm-x86/mmu_context.h can be unified, including
activate_mm paravirt hook.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mmu_context.h    | 32 ++++++++++++++++++++++++++++++++
 include/asm-x86/mmu_context_32.h | 28 ----------------------------
 include/asm-x86/mmu_context_64.h | 18 ------------------
 3 files changed, 32 insertions(+), 46 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mmu_context.h b/include/asm-x86/mmu_context.h
index 6598450da6c..fac57014e7c 100644
--- a/include/asm-x86/mmu_context.h
+++ b/include/asm-x86/mmu_context.h
@@ -1,5 +1,37 @@
+#ifndef __ASM_X86_MMU_CONTEXT_H
+#define __ASM_X86_MMU_CONTEXT_H
+
+#include <asm/desc.h>
+#include <asm/atomic.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/paravirt.h>
+#ifndef CONFIG_PARAVIRT
+#include <asm-generic/mm_hooks.h>
+
+static inline void paravirt_activate_mm(struct mm_struct *prev,
+					struct mm_struct *next)
+{
+}
+#endif	/* !CONFIG_PARAVIRT */
+
+/*
+ * Used for LDT copy/destruction.
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+void destroy_context(struct mm_struct *mm);
+
 #ifdef CONFIG_X86_32
 # include "mmu_context_32.h"
 #else
 # include "mmu_context_64.h"
 #endif
+
+#define activate_mm(prev, next)			\
+do {						\
+	paravirt_activate_mm((prev), (next));	\
+	switch_mm((prev), (next), NULL);	\
+} while (0);
+
+
+#endif /* __ASM_X86_MMU_CONTEXT_H */
diff --git a/include/asm-x86/mmu_context_32.h b/include/asm-x86/mmu_context_32.h
index 9756ae0f1dd..824fc575c6d 100644
--- a/include/asm-x86/mmu_context_32.h
+++ b/include/asm-x86/mmu_context_32.h
@@ -1,28 +1,6 @@
 #ifndef __I386_SCHED_H
 #define __I386_SCHED_H
 
-#include <asm/desc.h>
-#include <asm/atomic.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-#include <asm/paravirt.h>
-#ifndef CONFIG_PARAVIRT
-#include <asm-generic/mm_hooks.h>
-
-static inline void paravirt_activate_mm(struct mm_struct *prev,
-					struct mm_struct *next)
-{
-}
-#endif	/* !CONFIG_PARAVIRT */
-
-
-/*
- * Used for LDT copy/destruction.
- */
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
-void destroy_context(struct mm_struct *mm);
-
-
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
 #ifdef CONFIG_SMP
@@ -75,10 +53,4 @@ static inline void switch_mm(struct mm_struct *prev,
 #define deactivate_mm(tsk, mm)			\
 	asm("movl %0,%%gs": :"r" (0));
 
-#define activate_mm(prev, next)			\
-do {						\
-	paravirt_activate_mm((prev), (next));	\
-	switch_mm((prev), (next), NULL);	\
-} while (0);
-
 #endif
diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h
index ca44c71e7fb..c7000634cca 100644
--- a/include/asm-x86/mmu_context_64.h
+++ b/include/asm-x86/mmu_context_64.h
@@ -1,21 +1,7 @@
 #ifndef __X86_64_MMU_CONTEXT_H
 #define __X86_64_MMU_CONTEXT_H
 
-#include <asm/desc.h>
-#include <asm/atomic.h>
-#include <asm/pgalloc.h>
 #include <asm/pda.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#ifndef CONFIG_PARAVIRT
-#include <asm-generic/mm_hooks.h>
-#endif
-
-/*
- * possibly do the LDT unload here?
- */
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
-void destroy_context(struct mm_struct *mm);
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
@@ -65,8 +51,4 @@ do {						\
 	asm volatile("movl %0,%%fs"::"r"(0));	\
 } while (0)
 
-#define activate_mm(prev, next)			\
-	switch_mm((prev), (next), NULL)
-
-
 #endif
-- 
cgit v1.2.3-70-g09d2


From 4e29684c40f2a332ba4d05f6482d5807725d5624 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 24 Jun 2008 12:18:14 -0700
Subject: x86: introduce init_memory_mapping for 32bit #1

... so can we use mem below max_low_pfn earlier.

this allows us to move several functions more early instead of waiting
to after paging_init.

That includes moving relocate_initrd() earlier in the bootup, and kva
related early setup done in initmem_init. (in followup patches)

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_32.c |  10 ++--
 arch/x86/mm/init_32.c      | 141 +++++++++++++++++++++++++++++++++++----------
 include/asm-x86/page_32.h  |   2 +
 3 files changed, 120 insertions(+), 33 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index bba8d57bd7d..03007cada0d 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -226,10 +226,8 @@ static void __init reserve_initrd(void)
 	}
 
 	/* We need to move the initrd down into lowmem */
-	ramdisk_target = max_pfn_mapped<<PAGE_SHIFT;
-	ramdisk_here = find_e820_area(min(ramdisk_target, end_of_lowmem>>1),
-				 end_of_lowmem, ramdisk_size,
-				 PAGE_SIZE);
+	ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size,
+					 PAGE_SIZE);
 
 	if (ramdisk_here == -1ULL)
 		panic("Cannot find place for new RAMDISK of size %lld\n",
@@ -433,8 +431,12 @@ void __init setup_arch(char **cmdline_p)
 		max_pfn = e820_end_of_ram();
 	}
 
+	/* max_low_pfn get updated here */
 	find_low_pfn_range();
 
+	/* max_pfn_mapped is updated here */
+	init_memory_mapping(0, (max_low_pfn << PAGE_SHIFT));
+
 	reserve_initrd();
 
 	dmi_scan_machine();
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 20ca29591ab..619058e6bff 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -57,6 +57,27 @@ unsigned long highstart_pfn, highend_pfn;
 
 static noinline int do_test_wp_bit(void);
 
+
+static unsigned long __initdata table_start;
+static unsigned long __meminitdata table_end;
+static unsigned long __meminitdata table_top;
+
+static int __initdata after_init_bootmem;
+
+static __init void *alloc_low_page(unsigned long *phys)
+{
+	unsigned long pfn = table_end++;
+	void *adr;
+
+	if (pfn >= table_top)
+		panic("alloc_low_page: ran out of memory");
+
+	adr = __va(pfn * PAGE_SIZE);
+	memset(adr, 0, PAGE_SIZE);
+	*phys  = pfn * PAGE_SIZE;
+	return adr;
+}
+
 /*
  * Creates a middle page table and puts a pointer to it in the
  * given global directory entry. This only returns the gd entry
@@ -68,9 +89,12 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
 	pmd_t *pmd_table;
 
 #ifdef CONFIG_X86_PAE
+	unsigned long phys;
 	if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
-		pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-
+		if (after_init_bootmem)
+			pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+		else
+			pmd_table = (pmd_t *)alloc_low_page(&phys);
 		paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
 		set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
 		pud = pud_offset(pgd, 0);
@@ -92,12 +116,16 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
 	if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
 		pte_t *page_table = NULL;
 
+		if (after_init_bootmem) {
 #ifdef CONFIG_DEBUG_PAGEALLOC
-		page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
+			page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
 #endif
-		if (!page_table) {
-			page_table =
+			if (!page_table)
+				page_table =
 				(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+		} else {
+			unsigned long phys;
+			page_table = (pte_t *)alloc_low_page(&phys);
 		}
 
 		paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
@@ -155,7 +183,9 @@ static inline int is_kernel_text(unsigned long addr)
  * of max_low_pfn pages, by creating page tables starting from address
  * PAGE_OFFSET:
  */
-static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
+static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
+						unsigned long start,
+						unsigned long end)
 {
 	int pgd_idx, pmd_idx, pte_ofs;
 	unsigned long pfn;
@@ -163,18 +193,19 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned pages_2m = 0, pages_4k = 0;
+	unsigned limit_pfn = end >> PAGE_SHIFT;
 
 	pgd_idx = pgd_index(PAGE_OFFSET);
 	pgd = pgd_base + pgd_idx;
-	pfn = 0;
+	pfn = start >> PAGE_SHIFT;
 
 	for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
 		pmd = one_md_table_init(pgd);
-		if (pfn >= max_low_pfn)
+		if (pfn >= limit_pfn)
 			continue;
 
 		for (pmd_idx = 0;
-		     pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn;
+		     pmd_idx < PTRS_PER_PMD && pfn < limit_pfn;
 		     pmd++, pmd_idx++) {
 			unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
 
@@ -418,20 +449,7 @@ static void __init pagetable_init(void)
 
 	paravirt_pagetable_setup_start(pgd_base);
 
-	/* Enable PSE if available */
-	if (cpu_has_pse)
-		set_in_cr4(X86_CR4_PSE);
-
-	/* Enable PGE if available */
-	if (cpu_has_pge) {
-		set_in_cr4(X86_CR4_PGE);
-		__PAGE_KERNEL |= _PAGE_GLOBAL;
-		__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
-	}
-
-	kernel_physical_mapping_init(pgd_base);
 	remap_numa_kva();
-
 	/*
 	 * Fixed mappings, only the page table structure has to be
 	 * created - mappings will be set by set_fixmap():
@@ -703,6 +721,7 @@ void __init setup_bootmem_allocator(void)
 		free_bootmem_with_active_regions(i, max_low_pfn);
 	early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
 
+	after_init_bootmem = 1;
 }
 
 /*
@@ -723,6 +742,77 @@ static void __init remapped_pgdat_init(void)
 	}
 }
 
+static void __init find_early_table_space(unsigned long end)
+{
+	unsigned long puds, pmds, tables, start;
+
+	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
+	tables = PAGE_ALIGN(puds * sizeof(pud_t));
+
+	pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+	tables += PAGE_ALIGN(pmds * sizeof(pmd_t));
+
+	/*
+	 * RED-PEN putting page tables only on node 0 could
+	 * cause a hotspot and fill up ZONE_DMA. The page tables
+	 * need roughly 0.5KB per GB.
+	 */
+	start = 0x7000;
+	table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
+					tables, PAGE_SIZE);
+	if (table_start == -1UL)
+		panic("Cannot find space for the kernel page tables");
+
+	table_start >>= PAGE_SHIFT;
+	table_end = table_start;
+	table_top = table_start + (tables>>PAGE_SHIFT);
+
+	printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
+		end, table_start << PAGE_SHIFT,
+		(table_start << PAGE_SHIFT) + tables);
+}
+
+unsigned long __init_refok init_memory_mapping(unsigned long start,
+						unsigned long end)
+{
+	pgd_t *pgd_base = swapper_pg_dir;
+
+	/*
+	 * Find space for the kernel direct mapping tables.
+	 */
+	if (!after_init_bootmem)
+		find_early_table_space(end);
+
+#ifdef CONFIG_X86_PAE
+	set_nx();
+	if (nx_enabled)
+		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
+#endif
+
+	/* Enable PSE if available */
+	if (cpu_has_pse)
+		set_in_cr4(X86_CR4_PSE);
+
+	/* Enable PGE if available */
+	if (cpu_has_pge) {
+		set_in_cr4(X86_CR4_PGE);
+		__PAGE_KERNEL |= _PAGE_GLOBAL;
+		__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
+	}
+
+	kernel_physical_mapping_init(pgd_base, start, end);
+
+	load_cr3(swapper_pg_dir);
+
+	__flush_tlb_all();
+
+	if (!after_init_bootmem)
+		reserve_early(table_start << PAGE_SHIFT,
+				 table_end << PAGE_SHIFT, "PGTABLE");
+
+	return end >> PAGE_SHIFT;
+}
+
 /*
  * paging_init() sets up the page tables - note that the first 8MB are
  * already mapped by head.S.
@@ -732,15 +822,8 @@ static void __init remapped_pgdat_init(void)
  */
 void __init paging_init(void)
 {
-#ifdef CONFIG_X86_PAE
-	set_nx();
-	if (nx_enabled)
-		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-#endif
 	pagetable_init();
 
-	load_cr3(swapper_pg_dir);
-
 	__flush_tlb_all();
 
 	kmap_init();
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 3810d14051e..4ae1daba129 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -93,6 +93,8 @@ extern int sysctl_legacy_va_layout;
 #define MAXMEM			(-__PAGE_OFFSET - __VMALLOC_RESERVE)
 
 extern void find_low_pfn_range(void);
+extern unsigned long init_memory_mapping(unsigned long start,
+					 unsigned long end);
 extern void initmem_init(unsigned long, unsigned long);
 extern void zone_sizes_init(void);
 extern void setup_bootmem_allocator(void);
-- 
cgit v1.2.3-70-g09d2


From cfb0e53b05402f1ce65053677409a819c1798d34 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 24 Jun 2008 12:18:58 -0700
Subject: x86: introduce init_memory_mapping for 32bit #2

moving relocate_initrd early

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_32.c | 22 +++++++---------------
 arch/x86/mm/init_32.c      |  3 ---
 include/asm-x86/setup.h    |  1 -
 3 files changed, 7 insertions(+), 19 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 03007cada0d..4b953dc9388 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -186,7 +186,7 @@ static inline void copy_edd(void)
 
 #ifdef CONFIG_BLK_DEV_INITRD
 
-static bool do_relocate_initrd = false;
+static void __init relocate_initrd(void);
 
 static void __init reserve_initrd(void)
 {
@@ -195,7 +195,6 @@ static void __init reserve_initrd(void)
 	u64 ramdisk_end   = ramdisk_image + ramdisk_size;
 	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
 	u64 ramdisk_here;
-	u64 ramdisk_target;
 
 	if (!boot_params.hdr.type_of_loader ||
 	    !ramdisk_image || !ramdisk_size)
@@ -242,12 +241,12 @@ static void __init reserve_initrd(void)
 	printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
 			 ramdisk_here, ramdisk_here + ramdisk_size);
 
-	do_relocate_initrd = true;
+	relocate_initrd();
 }
 
 #define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
 
-void __init post_reserve_initrd(void)
+static void __init relocate_initrd(void)
 {
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
@@ -256,9 +255,6 @@ void __init post_reserve_initrd(void)
 	unsigned long slop, clen, mapaddr;
 	char *p, *q;
 
-	if (!do_relocate_initrd)
-		return;
-
 	ramdisk_here = initrd_start - PAGE_OFFSET;
 
 	q = (char *)initrd_start;
@@ -269,10 +265,6 @@ void __init post_reserve_initrd(void)
 		p = (char *)__va(ramdisk_image);
 		memcpy(q, p, clen);
 		q += clen;
-		/* need to free these low pages...*/
-		printk(KERN_INFO "Freeing old partial RAMDISK %08llx-%08llx\n",
-			 ramdisk_image, ramdisk_image + clen - 1);
-		free_bootmem(ramdisk_image, clen);
 		ramdisk_image += clen;
 		ramdisk_size  -= clen;
 	}
@@ -298,16 +290,16 @@ void __init post_reserve_initrd(void)
 		ramdisk_image, ramdisk_image + ramdisk_size - 1,
 		ramdisk_here, ramdisk_here + ramdisk_size - 1);
 
-	/* need to free that, otherwise init highmem will reserve it again */
+	/*
+	 * need to free old one, otherwise init cross max_low_pfn could be
+	 * converted to bootmem
+	 */
 	free_early(ramdisk_image, ramdisk_image+ramdisk_size);
 }
 #else
 void __init reserve_initrd(void)
 {
 }
-void __init post_reserve_initrd(void)
-{
-}
 #endif /* CONFIG_BLK_DEV_INITRD */
 
 #ifdef CONFIG_MCA
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 619058e6bff..ecccb055b08 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -831,9 +831,6 @@ void __init paging_init(void)
 	/*
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 */
-
-	post_reserve_initrd();
-
 	remapped_pgdat_init();
 	sparse_init();
 	zone_sizes_init();
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 4ebb4ef14c0..bb12a1619c1 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -39,7 +39,6 @@ void reserve_crashkernel(void);
 #include <asm/bootparam.h>
 
 void reserve_standard_io_resources(void);
-extern void post_reserve_initrd(void);
 
 #ifndef _SETUP
 
-- 
cgit v1.2.3-70-g09d2


From 3a58a2a6c879b2e47daafd6e641661c50ac9da5a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 24 Jun 2008 12:19:41 -0700
Subject: x86: introduce init_memory_mapping for 32bit #3

move kva related early backto initmem_init for numa32

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/discontig_32.c |  8 ++++++--
 arch/x86/mm/init_32.c      | 20 --------------------
 include/asm-x86/numa_32.h  |  5 -----
 3 files changed, 6 insertions(+), 27 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index f5ae31935ca..42484d446d0 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -200,7 +200,7 @@ void *alloc_remap(int nid, unsigned long size)
 	return allocation;
 }
 
-void __init remap_numa_kva(void)
+static void __init remap_numa_kva(void)
 {
 	void *vaddr;
 	unsigned long pfn;
@@ -373,12 +373,16 @@ void __init initmem_init(unsigned long start_pfn,
 
 		allocate_pgdat(nid);
 	}
+	remap_numa_kva();
+
 	printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
 	for_each_online_node(nid)
 		propagate_e820_map_node(nid);
 
-	memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
+	for_each_online_node(nid)
+		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
 	NODE_DATA(0)->bdata = &node0_bdata;
 	setup_bootmem_allocator();
 }
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ecccb055b08..c059c460e32 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -449,7 +449,6 @@ static void __init pagetable_init(void)
 
 	paravirt_pagetable_setup_start(pgd_base);
 
-	remap_numa_kva();
 	/*
 	 * Fixed mappings, only the page table structure has to be
 	 * created - mappings will be set by set_fixmap():
@@ -724,24 +723,6 @@ void __init setup_bootmem_allocator(void)
 	after_init_bootmem = 1;
 }
 
-/*
- * The node 0 pgdat is initialized before all of these because
- * it's needed for bootmem.  node>0 pgdats have their virtual
- * space allocated before the pagetables are in place to access
- * them, so they can't be cleared then.
- *
- * This should all compile down to nothing when NUMA is off.
- */
-static void __init remapped_pgdat_init(void)
-{
-	int nid;
-
-	for_each_online_node(nid) {
-		if (nid != 0)
-			memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
-	}
-}
-
 static void __init find_early_table_space(unsigned long end)
 {
 	unsigned long puds, pmds, tables, start;
@@ -831,7 +812,6 @@ void __init paging_init(void)
 	/*
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 */
-	remapped_pgdat_init();
 	sparse_init();
 	zone_sizes_init();
 
diff --git a/include/asm-x86/numa_32.h b/include/asm-x86/numa_32.h
index ed6c88eac84..220d7b7707a 100644
--- a/include/asm-x86/numa_32.h
+++ b/include/asm-x86/numa_32.h
@@ -5,12 +5,7 @@ extern int pxm_to_nid(int pxm);
 extern void numa_remove_cpu(int cpu);
 
 #ifdef CONFIG_NUMA
-extern void __init remap_numa_kva(void);
 extern void set_highmem_pages_init(void);
-#else
-static inline void remap_numa_kva(void)
-{
-}
 #endif
 
 #endif /* _ASM_X86_32_NUMA_H */
-- 
cgit v1.2.3-70-g09d2


From f47f9d538ecc938bed589e9d39ad7b454f3b506c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 24 Jun 2008 22:13:15 -0700
Subject: x86: numa 32 using apicid_2_node to get node for logical_apicid

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-bigsmp/mach_apic.h  | 2 +-
 include/asm-x86/mach-default/mach_apic.h | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/mach-bigsmp/mach_apic.h
index 8327907c79b..017c8c19ad8 100644
--- a/include/asm-x86/mach-bigsmp/mach_apic.h
+++ b/include/asm-x86/mach-bigsmp/mach_apic.h
@@ -81,7 +81,7 @@ static inline int multi_timer_check(int apic, int irq)
 
 static inline int apicid_to_node(int logical_apicid)
 {
-	return (0);
+	return apicid_2_node[hard_smp_processor_id()];
 }
 
 static inline int cpu_present_to_apicid(int mps_cpu)
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index 21003b56ae9..0b2cde5e1b7 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -77,7 +77,11 @@ static inline void setup_apic_routing(void)
 
 static inline int apicid_to_node(int logical_apicid)
 {
+#ifdef CONFIG_SMP
+	return apicid_2_node[hard_smp_processor_id()];
+#else
 	return 0;
+#endif
 }
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From c987d12f8455b19b3b057d63bac3de161bd809fc Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 24 Jun 2008 22:14:09 -0700
Subject: x86: remove end_pfn in 64bit

and use max_pfn directly.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/aperture_64.c      |  6 ++++--
 arch/x86/kernel/e820.c             |  2 +-
 arch/x86/kernel/early-quirks.c     |  2 +-
 arch/x86/kernel/machine_kexec_64.c |  2 +-
 arch/x86/kernel/pci-calgary_64.c   |  4 ++--
 arch/x86/kernel/pci-dma.c          |  4 ++--
 arch/x86/kernel/pci-gart_64.c      |  4 ++--
 arch/x86/kernel/pci-swiotlb_64.c   |  2 +-
 arch/x86/mm/init_64.c              | 17 ++++++-----------
 arch/x86/mm/k8topology_64.c        |  4 ++--
 arch/x86/mm/numa_64.c              |  4 ++--
 arch/x86/mm/srat_64.c              |  2 +-
 arch/x86/power/hibernate_64.c      |  2 +-
 include/asm-x86/page_64.h          |  5 +++--
 14 files changed, 29 insertions(+), 31 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 600470d464f..9f907806c1a 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -407,7 +407,9 @@ void __init gart_iommu_hole_init(void)
 				    agp_aper_base == aper_base &&
 				    agp_aper_order == aper_order) {
 					/* the same between two setting from NB and agp */
-					if (!no_iommu && end_pfn > MAX_DMA32_PFN && !printed_gart_size_msg) {
+					if (!no_iommu &&
+					    max_pfn > MAX_DMA32_PFN &&
+					    !printed_gart_size_msg) {
 						printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n");
 						printk(KERN_ERR "please increase GART size in your BIOS setup\n");
 						printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n");
@@ -448,7 +450,7 @@ out:
 		/* Got the aperture from the AGP bridge */
 	} else if (swiotlb && !valid_agp) {
 		/* Do nothing */
-	} else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) ||
+	} else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
 		   force_iommu ||
 		   valid_agp ||
 		   fallback_aper_force) {
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 1b76b25b4d9..3900ff51bc6 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -527,7 +527,7 @@ __init void e820_setup_gap(void)
 
 #ifdef CONFIG_X86_64
 	if (!found) {
-		gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
+		gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
 		printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
 		       "address range\n"
 		       KERN_ERR "PCI: Unassigned devices with 32bit resource "
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 84fd9f2a28f..a4665f37cfc 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -50,7 +50,7 @@ static void __init fix_hypertransport_config(int num, int slot, int func)
 static void __init via_bugs(int  num, int slot, int func)
 {
 #ifdef CONFIG_GART_IOMMU
-	if ((end_pfn > MAX_DMA32_PFN ||  force_iommu) &&
+	if ((max_pfn > MAX_DMA32_PFN ||  force_iommu) &&
 	    !gart_iommu_aperture_allowed) {
 		printk(KERN_INFO
 		       "Looks like a VIA chipset. Disabling IOMMU."
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 576a03db451..7830dc4a838 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -110,7 +110,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 {
 	pgd_t *level4p;
 	level4p = (pgd_t *)__va(start_pgtable);
- 	return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
+	return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
 }
 
 static void set_idt(void *newidt, u16 limit)
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index e28ec497e14..6959b5c45df 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1394,7 +1394,7 @@ void __init detect_calgary(void)
 		return;
 	}
 
-	specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
+	specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE);
 
 	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
 		struct calgary_bus_info *info = &bus_info[bus];
@@ -1459,7 +1459,7 @@ int __init calgary_iommu_init(void)
 	if (ret) {
 		printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
 		       "falling back to no_iommu\n", ret);
-		if (end_pfn > MAX_DMA32_PFN)
+		if (max_pfn > MAX_DMA32_PFN)
 			printk(KERN_ERR "WARNING more than 4GB of memory, "
 					"32bit PCI may malfunction.\n");
 		return ret;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index cb0bdf44071..8467ec2320f 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -75,7 +75,7 @@ early_param("dma32_size", parse_dma32_size_opt);
 void __init dma32_reserve_bootmem(void)
 {
 	unsigned long size, align;
-	if (end_pfn <= MAX_DMA32_PFN)
+	if (max_pfn <= MAX_DMA32_PFN)
 		return;
 
 	/*
@@ -94,7 +94,7 @@ void __init dma32_reserve_bootmem(void)
 static void __init dma32_free_bootmem(void)
 {
 
-	if (end_pfn <= MAX_DMA32_PFN)
+	if (max_pfn <= MAX_DMA32_PFN)
 		return;
 
 	if (!dma32_bootmem_ptr)
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 021f3c684a6..d0d18db5d2a 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -751,10 +751,10 @@ void __init gart_iommu_init(void)
 		return;
 
 	if (no_iommu ||
-	    (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
+	    (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
 	    !gart_iommu_aperture ||
 	    (no_agp && init_k8_gatt(&info) < 0)) {
-		if (end_pfn > MAX_DMA32_PFN) {
+		if (max_pfn > MAX_DMA32_PFN) {
 			printk(KERN_WARNING "More than 4GB of memory "
 			       	          "but GART IOMMU not available.\n"
 			       KERN_WARNING "falling back to iommu=soft.\n");
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 490da7f4b8d..82299cd1d04 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -38,7 +38,7 @@ const struct dma_mapping_ops swiotlb_dma_ops = {
 void __init pci_swiotlb_init(void)
 {
 	/* don't initialize swiotlb if iommu=off (no_iommu=1) */
-	if (!iommu_detected && !no_iommu && end_pfn > MAX_DMA32_PFN)
+	if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)
 	       swiotlb = 1;
 	if (swiotlb_force)
 		swiotlb = 1;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 6eced2f1073..d5d4b04d48a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -48,11 +48,6 @@
 #include <asm/numa.h>
 #include <asm/cacheflush.h>
 
-/*
- * PFN of last memory page.
- */
-unsigned long end_pfn;
-
 /*
  * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
  * The direct mapping extends to max_pfn_mapped, so that we can directly access
@@ -586,9 +581,9 @@ void __init paging_init(void)
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
-	max_zone_pfns[ZONE_NORMAL] = end_pfn;
+	max_zone_pfns[ZONE_NORMAL] = max_pfn;
 
-	memory_present(0, 0, end_pfn);
+	memory_present(0, 0, max_pfn);
 	sparse_init();
 	free_area_init_nodes(max_zone_pfns);
 }
@@ -670,8 +665,8 @@ void __init mem_init(void)
 #else
 	totalram_pages = free_all_bootmem();
 #endif
-	reservedpages = end_pfn - totalram_pages -
-					absent_pages_in_range(0, end_pfn);
+	reservedpages = max_pfn - totalram_pages -
+					absent_pages_in_range(0, max_pfn);
 	after_bootmem = 1;
 
 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
@@ -690,7 +685,7 @@ void __init mem_init(void)
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 				"%ldk reserved, %ldk data, %ldk init)\n",
 		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
-		end_pfn << (PAGE_SHIFT-10),
+		max_pfn << (PAGE_SHIFT-10),
 		codesize >> 10,
 		reservedpages << (PAGE_SHIFT-10),
 		datasize >> 10,
@@ -784,7 +779,7 @@ int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
 #endif
 	unsigned long pfn = phys >> PAGE_SHIFT;
 
-	if (pfn >= end_pfn) {
+	if (pfn >= max_pfn) {
 		/*
 		 * This can happen with kdump kernels when accessing
 		 * firmware tables:
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 317573ec925..41f1b5c00a1 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -143,8 +143,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
 		limit |= (1<<24)-1;
 		limit++;
 
-		if (limit > end_pfn << PAGE_SHIFT)
-			limit = end_pfn << PAGE_SHIFT;
+		if (limit > max_pfn << PAGE_SHIFT)
+			limit = max_pfn << PAGE_SHIFT;
 		if (limit <= base)
 			continue;
 
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 316e5f961ef..b432d578177 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -86,7 +86,7 @@ static int __init allocate_cachealigned_memnodemap(void)
 
 	addr = 0x8000;
 	nodemap_size = round_up(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
-	nodemap_addr = find_e820_area(addr, end_pfn<<PAGE_SHIFT,
+	nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT,
 				      nodemap_size, L1_CACHE_BYTES);
 	if (nodemap_addr == -1UL) {
 		printk(KERN_ERR
@@ -579,7 +579,7 @@ void __init paging_init(void)
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
-	max_zone_pfns[ZONE_NORMAL] = end_pfn;
+	max_zone_pfns[ZONE_NORMAL] = max_pfn;
 
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index b67f5a16755..0fd67b81a8b 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -299,7 +299,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
 			pxmram = 0;
 	}
 
-	e820ram = end_pfn - absent_pages_in_range(0, end_pfn);
+	e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
 	/* We seem to lose 3 pages somewhere. Allow a bit of slack. */
 	if ((long)(e820ram - pxmram) >= 1*1024*1024) {
 		printk(KERN_ERR
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index b542355e0e3..6dd000dd793 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -83,7 +83,7 @@ static int set_up_temporary_mappings(void)
 
 	/* Set up the direct mapping from scratch */
 	start = (unsigned long)pfn_to_kaddr(0);
-	end = (unsigned long)pfn_to_kaddr(end_pfn);
+	end = (unsigned long)pfn_to_kaddr(max_pfn);
 
 	for (; start < end; start = next) {
 		pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index ac37643078e..cac5b9e7826 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -58,7 +58,8 @@
 void clear_page(void *page);
 void copy_page(void *to, void *from);
 
-extern unsigned long end_pfn;
+/* duplicated to the one in bootmem.h */
+extern unsigned long max_pfn;
 extern unsigned long phys_base;
 
 extern unsigned long __phys_addr(unsigned long);
@@ -87,7 +88,7 @@ extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
 #endif	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn)          ((pfn) < end_pfn)
+#define pfn_valid(pfn)          ((pfn) < max_pfn)
 #endif
 
 
-- 
cgit v1.2.3-70-g09d2


From 3381959da5a00ae8289cfbd28b0b6d228f2d1d46 Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Tue, 24 Jun 2008 11:48:30 -0700
Subject: x86: cleanup e820_setup_gap(), add e820_search_gap(), v2

This is a preparatory patch for the next patch in series.
Moves some code from e820_setup_gap to a new function e820_search_gap.
This patch is a part of a bug fix where we walk the ACPI table to calculate
a gap for PCI optional devices.

v1->v2: Patch on top of tip/master.
	Fixes a bug introduced in the last patch about the typeof "last".
	Also the new function e820_search_gap now returns if we found a gap in
	e820_map.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Cc: lenb@kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c | 43 ++++++++++++++++++++++++++++---------------
 include/asm-x86/e820.h |  2 ++
 2 files changed, 30 insertions(+), 15 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 3900ff51bc6..22cfd665224 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -488,26 +488,22 @@ void __init update_e820(void)
 }
 
 /*
- * Search for the biggest gap in the low 32 bits of the e820
- * memory space.  We pass this space to PCI to assign MMIO resources
- * for hotplug or unconfigured devices in.
- * Hopefully the BIOS let enough space left.
+ * Search for a gap in the e820 memory space from start_addr to 2^32.
  */
-__init void e820_setup_gap(void)
+__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
+		unsigned long start_addr)
 {
-	unsigned long gapstart, gapsize, round;
-	unsigned long long last;
-	int i;
+	unsigned long long last = 0x100000000ull;
+	int i = e820.nr_map;
 	int found = 0;
 
-	last = 0x100000000ull;
-	gapstart = 0x10000000;
-	gapsize = 0x400000;
-	i = e820.nr_map;
 	while (--i >= 0) {
 		unsigned long long start = e820.map[i].addr;
 		unsigned long long end = start + e820.map[i].size;
 
+		if (end < start_addr)
+			continue;
+
 		/*
 		 * Since "last" is at most 4GB, we know we'll
 		 * fit in 32 bits if this condition is true
@@ -515,15 +511,32 @@ __init void e820_setup_gap(void)
 		if (last > end) {
 			unsigned long gap = last - end;
 
-			if (gap > gapsize) {
-				gapsize = gap;
-				gapstart = end;
+			if (gap >= *gapsize) {
+				*gapsize = gap;
+				*gapstart = end;
 				found = 1;
 			}
 		}
 		if (start < last)
 			last = start;
 	}
+	return found;
+}
+
+/*
+ * Search for the biggest gap in the low 32 bits of the e820
+ * memory space.  We pass this space to PCI to assign MMIO resources
+ * for hotplug or unconfigured devices in.
+ * Hopefully the BIOS let enough space left.
+ */
+__init void e820_setup_gap(void)
+{
+	unsigned long gapstart, gapsize, round;
+	int found;
+
+	gapstart = 0x10000000;
+	gapsize = 0x400000;
+	found  = e820_search_gap(&gapstart, &gapsize, 0);
 
 #ifdef CONFIG_X86_64
 	if (!found) {
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 13fa5a076aa..f622685c9af 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -71,6 +71,8 @@ extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
 			     int checktype);
 extern void update_e820(void);
 extern void e820_setup_gap(void);
+extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
+				unsigned long start_addr);
 struct setup_data;
 extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data);
 
-- 
cgit v1.2.3-70-g09d2


From 29f784e369a914b5926e01a0b0caae0b47f6452a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 25 Jun 2008 18:00:22 -0700
Subject: x86: change some functions in setup.c to static

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c     | 8 ++++----
 include/asm-x86/bootparam.h | 1 -
 include/asm-x86/setup.h     | 5 -----
 3 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 7690547ac28..35022dc1042 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -375,7 +375,7 @@ static void __init reserve_initrd(void)
 }
 #endif /* CONFIG_BLK_DEV_INITRD */
 
-void __init parse_setup_data(void)
+static void __init parse_setup_data(void)
 {
 	struct setup_data *data;
 	u64 pa_data;
@@ -417,7 +417,7 @@ static inline unsigned long long get_total_mem(void)
 	return total << PAGE_SHIFT;
 }
 
-void __init reserve_crashkernel(void)
+static void __init reserve_crashkernel(void)
 {
 	unsigned long long total_mem;
 	unsigned long long crash_size, crash_base;
@@ -453,7 +453,7 @@ void __init reserve_crashkernel(void)
 	}
 }
 #else
-void __init reserve_crashkernel(void)
+static void __init reserve_crashkernel(void)
 {
 }
 #endif
@@ -481,7 +481,7 @@ static struct resource standard_io_resources[] = {
 		.flags = IORESOURCE_BUSY | IORESOURCE_IO }
 };
 
-void __init reserve_standard_io_resources(void)
+static void __init reserve_standard_io_resources(void)
 {
 	int i;
 
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
index 55ae9d0c425..6eeba3b2812 100644
--- a/include/asm-x86/bootparam.h
+++ b/include/asm-x86/bootparam.h
@@ -109,6 +109,5 @@ struct boot_params {
 } __attribute__((packed));
 
 void reserve_setup_data(void);
-void parse_setup_data(void);
 
 #endif /* _ASM_BOOTPARAM_H */
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index bb12a1619c1..269ba7fe21d 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -8,9 +8,6 @@
 /* Interrupt control for vSMPowered x86_64 systems */
 void vsmp_init(void);
 
-/* Crashkernel reservation */
-void reserve_crashkernel(void);
-
 #ifndef CONFIG_PARAVIRT
 #define paravirt_post_allocator_init()	do {} while (0)
 #endif
@@ -38,8 +35,6 @@ void reserve_crashkernel(void);
 #ifndef __ASSEMBLY__
 #include <asm/bootparam.h>
 
-void reserve_standard_io_resources(void);
-
 #ifndef _SETUP
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 102e3b8d3f3d5556c60f9ab6d92108649b68edc8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:09 -0400
Subject: x86, 64-bit: add prototype for x86_64_start_kernel()

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/setup.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 269ba7fe21d..42bc62b4b70 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -56,6 +56,8 @@ extern void probe_roms(void);
 extern unsigned long init_pg_tables_start;
 extern unsigned long init_pg_tables_end;
 
+#else
+void __init x86_64_start_kernel(char *real_mode);
 #endif /* __i386__ */
 #endif /* _SETUP */
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3-70-g09d2


From 15878c0b21b7b04a08108e9027ebbbd68a2502e0 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:10 -0400
Subject: x86, 64-bit: add sync_cmpxchg

Add sync_cmpxchg to match 32-bit's sync_cmpxchg.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/cmpxchg_64.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/cmpxchg_64.h b/include/asm-x86/cmpxchg_64.h
index d9b26b9a28c..17463ccf816 100644
--- a/include/asm-x86/cmpxchg_64.h
+++ b/include/asm-x86/cmpxchg_64.h
@@ -93,6 +93,39 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 	return old;
 }
 
+/*
+ * Always use locked operations when touching memory shared with a
+ * hypervisor, since the system may be SMP even if the guest kernel
+ * isn't.
+ */
+static inline unsigned long __sync_cmpxchg(volatile void *ptr,
+					   unsigned long old,
+					   unsigned long new, int size)
+{
+	unsigned long prev;
+	switch (size) {
+	case 1:
+		asm volatile("lock; cmpxchgb %b1,%2"
+			     : "=a"(prev)
+			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
+			     : "memory");
+		return prev;
+	case 2:
+		asm volatile("lock; cmpxchgw %w1,%2"
+			     : "=a"(prev)
+			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
+			     : "memory");
+		return prev;
+	case 4:
+		asm volatile("lock; cmpxchgl %1,%2"
+			     : "=a"(prev)
+			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
+			     : "memory");
+		return prev;
+	}
+	return old;
+}
+
 static inline unsigned long __cmpxchg_local(volatile void *ptr,
 					    unsigned long old,
 					    unsigned long new, int size)
@@ -139,6 +172,10 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
 	((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o),	\
 					     (unsigned long)(n),	\
 					     sizeof(*(ptr))))
+#define sync_cmpxchg(ptr, o, n)						\
+	((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o),	\
+					    (unsigned long)(n),		\
+					    sizeof(*(ptr))))
 #define cmpxchg64_local(ptr, o, n)					\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-- 
cgit v1.2.3-70-g09d2


From eba0045ff87bab465d3c80c289f3bf709c1800f5 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:12 -0400
Subject: x86/paravirt: add a pgd_alloc/free hooks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add hooks which are called at pgd_alloc/free time.  The pgd_alloc hook
may return an error code, which if non-zero, causes the pgd allocation
to be failed.  The hooks may be used to allocate/free auxillary
per-pgd information.

also fix:

> * Ingo Molnar <mingo@elte.hu> wrote:
>
>  include/asm/pgalloc.h: In function ‘paravirt_pgd_free':
>  include/asm/pgalloc.h:14: error: parameter name omitted
>  arch/x86/kernel/entry_64.S: In file included from
>  arch/x86/kernel/traps_64.c:51:include/asm/pgalloc.h: In function ‘paravirt_pgd_free':
>  include/asm/pgalloc.h:14: error: parameter name omitted

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/paravirt.c |  4 ++++
 arch/x86/mm/pgtable.c      | 13 ++++++++-----
 arch/x86/xen/enlighten.c   |  4 ++++
 include/asm-x86/paravirt.h | 19 ++++++++++++++++++-
 include/asm-x86/pgalloc.h  |  4 ++++
 5 files changed, 38 insertions(+), 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e9b50453721..78c9a1b9e6b 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -30,6 +30,7 @@
 #include <asm/setup.h>
 #include <asm/arch_hooks.h>
 #include <asm/time.h>
+#include <asm/pgalloc.h>
 #include <asm/irq.h>
 #include <asm/delay.h>
 #include <asm/fixmap.h>
@@ -366,6 +367,9 @@ struct pv_mmu_ops pv_mmu_ops = {
 	.flush_tlb_single = native_flush_tlb_single,
 	.flush_tlb_others = native_flush_tlb_others,
 
+	.pgd_alloc = __paravirt_pgd_alloc,
+	.pgd_free = paravirt_nop,
+
 	.alloc_pte = paravirt_nop,
 	.alloc_pmd = paravirt_nop,
 	.alloc_pmd_clone = paravirt_nop,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 45b99ac3948..418c4432fb3 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -215,13 +215,15 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 
 	/* so that alloc_pmd can use it */
 	mm->pgd = pgd;
-	if (pgd)
+	if (pgd) {
 		pgd_ctor(pgd);
 
-	if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
-		pgd_dtor(pgd);
-		free_page((unsigned long)pgd);
-		pgd = NULL;
+		if (paravirt_pgd_alloc(mm) != 0 ||
+		    !pgd_prepopulate_pmd(mm, pgd)) {
+			pgd_dtor(pgd);
+			free_page((unsigned long)pgd);
+			pgd = NULL;
+		}
 	}
 
 	return pgd;
@@ -231,6 +233,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 	pgd_mop_up_pmds(mm, pgd);
 	pgd_dtor(pgd);
+	paravirt_pgd_free(mm, pgd);
 	free_page((unsigned long)pgd);
 }
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 76ad1efaf09..d62f14e2070 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -45,6 +45,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/reboot.h>
+#include <asm/pgalloc.h>
 
 #include "xen-ops.h"
 #include "mmu.h"
@@ -1153,6 +1154,9 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.pte_update = paravirt_nop,
 	.pte_update_defer = paravirt_nop,
 
+	.pgd_alloc = __paravirt_pgd_alloc,
+	.pgd_free = paravirt_nop,
+
 	.alloc_pte = xen_alloc_pte_init,
 	.release_pte = xen_release_pte_init,
 	.alloc_pmd = xen_alloc_pte_init,
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 41f5447efd8..5467e2cff4b 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -219,7 +219,14 @@ struct pv_mmu_ops {
 	void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
 				 unsigned long va);
 
-	/* Hooks for allocating/releasing pagetable pages */
+	/* Hooks for allocating and freeing a pagetable top-level */
+	int  (*pgd_alloc)(struct mm_struct *mm);
+	void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd);
+
+	/*
+	 * Hooks for allocating/releasing pagetable pages when they're
+	 * attached to a pagetable
+	 */
 	void (*alloc_pte)(struct mm_struct *mm, u32 pfn);
 	void (*alloc_pmd)(struct mm_struct *mm, u32 pfn);
 	void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
@@ -925,6 +932,16 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
 	PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
 }
 
+static inline int paravirt_pgd_alloc(struct mm_struct *mm)
+{
+	return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm);
+}
+
+static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd);
+}
+
 static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn)
 {
 	PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
diff --git a/include/asm-x86/pgalloc.h b/include/asm-x86/pgalloc.h
index 91e4641f3f3..d63ea431cb3 100644
--- a/include/asm-x86/pgalloc.h
+++ b/include/asm-x86/pgalloc.h
@@ -5,9 +5,13 @@
 #include <linux/mm.h>		/* for struct page */
 #include <linux/pagemap.h>
 
+static inline int  __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
+#define paravirt_pgd_alloc(mm)	__paravirt_pgd_alloc(mm)
+static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) {}
 static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn)	{}
 static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)	{}
 static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
-- 
cgit v1.2.3-70-g09d2


From 97349135fea7f0ba8464534433df3bfd1dc0e9a6 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:14 -0400
Subject: x86/paravirt: add debugging for missing operations

Rather than just jumping to 0 when there's a missing operation, raise a BUG.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig           | 7 +++++++
 include/asm-x86/paravirt.h | 8 ++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/asm-x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9441e46cb49..67e5275f03c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -435,6 +435,13 @@ config PARAVIRT_CLOCK
 
 endif
 
+config PARAVIRT_DEBUG
+       bool "paravirt-ops debugging"
+       depends on PARAVIRT && DEBUG_KERNEL
+       help
+         Enable to debug paravirt_ops internals.  Specifically, BUG if
+	 a paravirt_op is missing when it is called.
+
 config MEMTEST
 	bool "Memtest"
 	depends on X86_64
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 5467e2cff4b..198293bed46 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -459,10 +459,17 @@ int paravirt_disable_iospace(void);
 #define VEXTRA_CLOBBERS	 , "rax", "r8", "r9", "r10", "r11"
 #endif
 
+#ifdef CONFIG_PARAVIRT_DEBUG
+#define PVOP_TEST_NULL(op)	BUG_ON(op == NULL)
+#else
+#define PVOP_TEST_NULL(op)	((void)op)
+#endif
+
 #define __PVOP_CALL(rettype, op, pre, post, ...)			\
 	({								\
 		rettype __ret;						\
 		PVOP_CALL_ARGS;					\
+		PVOP_TEST_NULL(op);					\
 		/* This is 32-bit specific, but is okay in 64-bit */	\
 		/* since this condition will never hold */		\
 		if (sizeof(rettype) > sizeof(unsigned long)) {		\
@@ -491,6 +498,7 @@ int paravirt_disable_iospace(void);
 #define __PVOP_VCALL(op, pre, post, ...)				\
 	({								\
 		PVOP_VCALL_ARGS;					\
+		PVOP_TEST_NULL(op);					\
 		asm volatile(pre					\
 			     paravirt_alt(PARAVIRT_CALL)		\
 			     post					\
-- 
cgit v1.2.3-70-g09d2


From 491eccb721c2ee67250273a96e4515fb5b423337 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:15 -0400
Subject: x86/paravirt: define PARA_INDIRECT for indirect asm calls

On 32-bit it's best to use a %cs: prefix to access memory where the
other segments may not bet set up properly yet.  On 64-bit it's best
to use a rip-relative addressing mode.  Define PARA_INDIRECT() to
abstract this and generate the proper addressing mode in each case.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/paravirt.h | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 198293bed46..82cdcde4b22 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1455,51 +1455,53 @@ static inline unsigned long __raw_local_irq_save(void)
 #define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax
 #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
+#define PARA_INDIRECT(addr)	*addr(%rip)
 #else
 #define PV_SAVE_REGS   pushl %eax; pushl %edi; pushl %ecx; pushl %edx
 #define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
 #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
+#define PARA_INDIRECT(addr)	*%cs:addr
 #endif
 
 #define INTERRUPT_RETURN						\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,	\
-		  jmp *%cs:pv_cpu_ops+PV_CPU_iret)
+		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
 
 #define DISABLE_INTERRUPTS(clobbers)					\
 	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
-		  PV_SAVE_REGS;			\
-		  call *%cs:pv_irq_ops+PV_IRQ_irq_disable;		\
+		  PV_SAVE_REGS;						\
+		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);	\
 		  PV_RESTORE_REGS;)			\
 
 #define ENABLE_INTERRUPTS(clobbers)					\
 	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,	\
-		  PV_SAVE_REGS;			\
-		  call *%cs:pv_irq_ops+PV_IRQ_irq_enable;		\
+		  PV_SAVE_REGS;						\
+		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\
 		  PV_RESTORE_REGS;)
 
 #define ENABLE_INTERRUPTS_SYSCALL_RET					\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\
 		  CLBR_NONE,						\
-		  jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_syscall_ret)
+		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_syscall_ret))
 
 
 #ifdef CONFIG_X86_32
-#define GET_CR0_INTO_EAX			\
-	push %ecx; push %edx;			\
-	call *pv_cpu_ops+PV_CPU_read_cr0;	\
+#define GET_CR0_INTO_EAX				\
+	push %ecx; push %edx;				\
+	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0);	\
 	pop %edx; pop %ecx
 #else
 #define SWAPGS								\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
 		  PV_SAVE_REGS;						\
-		  call *pv_cpu_ops+PV_CPU_swapgs;			\
+		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs);		\
 		  PV_RESTORE_REGS					\
 		 )
 
-#define GET_CR2_INTO_RCX			\
-	call *pv_mmu_ops+PV_MMU_read_cr2;	\
-	movq %rax, %rcx;			\
+#define GET_CR2_INTO_RCX				\
+	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);	\
+	movq %rax, %rcx;				\
 	xorq %rax, %rax;
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From a6523748bddd38bcec11431f57502090b6014a96 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 25 Jun 2008 00:19:16 -0400
Subject: paravirt/x86, 64-bit: move __PAGE_OFFSET to leave a space for
 hypervisor

Set __PAGE_OFFSET to the most negative possible address +
16*PGDIR_SIZE.  The gap is to allow a space for a hypervisor to fit.
The gap is more or less arbitrary, but it's what Xen needs.

When booting native, kernel/head_64.S has a set of compile-time
generated pagetables used at boot time.  This patch removes their
absolutely hard-coded layout, and makes it parameterised on
__PAGE_OFFSET (and __START_KERNEL_map).

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head_64.S | 17 ++++++++++++-----
 include/asm-x86/page_64.h |  8 +++++++-
 2 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 32f5a114d1a..38279fab093 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -32,6 +32,13 @@
  *
  */
 
+#define pud_index(x)	(((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+
+L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
+L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
+L4_START_KERNEL = pgd_index(__START_KERNEL_map)
+L3_START_KERNEL = pud_index(__START_KERNEL_map)
+
 	.text
 	.section .text.head
 	.code64
@@ -77,8 +84,8 @@ startup_64:
 	/* Fixup the physical addresses in the page table
 	 */
 	addq	%rbp, init_level4_pgt + 0(%rip)
-	addq	%rbp, init_level4_pgt + (258*8)(%rip)
-	addq	%rbp, init_level4_pgt + (511*8)(%rip)
+	addq	%rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip)
+	addq	%rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip)
 
 	addq	%rbp, level3_ident_pgt + 0(%rip)
 
@@ -338,9 +345,9 @@ ENTRY(name)
 	 */
 NEXT_PAGE(init_level4_pgt)
 	.quad	level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
-	.fill	257,8,0
+	.org	init_level4_pgt + L4_PAGE_OFFSET*8, 0
 	.quad	level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
-	.fill	252,8,0
+	.org	init_level4_pgt + L4_START_KERNEL*8, 0
 	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
 	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
 
@@ -349,7 +356,7 @@ NEXT_PAGE(level3_ident_pgt)
 	.fill	511,8,0
 
 NEXT_PAGE(level3_kernel_pgt)
-	.fill	510,8,0
+	.fill	L3_START_KERNEL,8,0
 	/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
 	.quad	level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
 	.quad	level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index cac5b9e7826..010d12db80d 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -26,7 +26,13 @@
 #define PUD_PAGE_SIZE		(_AC(1, UL) << PUD_SHIFT)
 #define PUD_PAGE_MASK		(~(PUD_PAGE_SIZE-1))
 
-#define __PAGE_OFFSET           _AC(0xffff810000000000, UL)
+/*
+ * Set __PAGE_OFFSET to the most negative possible address +
+ * PGDIR_SIZE*16 (pgd slot 272).  The gap is to allow a space for a
+ * hypervisor to fit.  Choosing 16 slots here is arbitrary, but it's
+ * what Xen requires.
+ */
+#define __PAGE_OFFSET           _AC(0xffff880000000000, UL)
 
 #define __PHYSICAL_START	CONFIG_PHYSICAL_START
 #define __KERNEL_ALIGN		0x200000
-- 
cgit v1.2.3-70-g09d2


From 408011759cc8ff7f89505e8398cec0ccf67b5afa Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:17 -0400
Subject: x86, 64-bit: add FIX_PARAVIRT_BOOTMAP fixmap slot

This matches 32 bit.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/fixmap_64.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h
index c7dcc365f47..1a0b61eb02f 100644
--- a/include/asm-x86/fixmap_64.h
+++ b/include/asm-x86/fixmap_64.h
@@ -46,6 +46,9 @@ enum fixed_addresses {
 	FIX_EFI_IO_MAP_LAST_PAGE,
 	FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE
 				  + MAX_EFI_IO_PAGES - 1,
+#ifdef CONFIG_PARAVIRT
+	FIX_PARAVIRT_BOOTMAP,
+#endif
 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
 	FIX_OHCI1394_BASE,
 #endif
-- 
cgit v1.2.3-70-g09d2


From f97013fd8f17120182aa247f360e4d2069a9db9c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:18 -0400
Subject: x86, 64-bit: split x86_64_start_kernel

Split x86_64_start_kernel() into two pieces:

   The first essentially cleans up after head_64.S.  It clears the
   bss, zaps low identity mappings, sets up some early exception
   handlers.

   The second part preserves the boot data, reserves the kernel's
   text/data/bss, pagetables and ramdisk, and then starts the kernel
   proper.

This split is so that Xen can call the second part to do the set up it
needs done.  It doesn't need any of the first part setups, because it
doesn't boot via head_64.S, and its redundant or actively damaging.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head64.c | 5 +++++
 include/asm-x86/setup.h  | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index c970929bb15..f684e3b3de4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -108,6 +108,11 @@ void __init x86_64_start_kernel(char * real_mode_data)
 
 	early_printk("Kernel really alive\n");
 
+	x86_64_start_reservations(real_mode_data);
+}
+
+void __init x86_64_start_reservations(char *real_mode_data)
+{
 	copy_bootdata(__va(real_mode_data));
 
 	reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 42bc62b4b70..1d121c632d9 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -58,6 +58,8 @@ extern unsigned long init_pg_tables_end;
 
 #else
 void __init x86_64_start_kernel(char *real_mode);
+void __init x86_64_start_reservations(char *real_mode_data);
+
 #endif /* __i386__ */
 #endif /* _SETUP */
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3-70-g09d2


From 4f30cb0262847392d8d006042f24bd90abd24f9d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:21 -0400
Subject: x86, 64-bit: PSE no longer a hard requirement

Because Xen doesn't support PSE mappings in guests, all code which
assumed the presence of PSE has been changed to fall back to smaller
mappings if necessary.  As a result, PSE is optional rather than
required (though still used whereever possible).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/required-features.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/required-features.h b/include/asm-x86/required-features.h
index 8c387198ca8..adec887dd7c 100644
--- a/include/asm-x86/required-features.h
+++ b/include/asm-x86/required-features.h
@@ -42,7 +42,7 @@
 #endif
 
 #ifdef CONFIG_X86_64
-#define NEED_PSE	(1<<(X86_FEATURE_PSE & 31))
+#define NEED_PSE	0
 #define NEED_MSR	(1<<(X86_FEATURE_MSR & 31))
 #define NEED_PGE	(1<<(X86_FEATURE_PGE & 31))
 #define NEED_FXSR	(1<<(X86_FEATURE_FXSR & 31))
-- 
cgit v1.2.3-70-g09d2


From 0814e0bace537b7024b09187346b99401e6281be Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 25 Jun 2008 00:19:22 -0400
Subject: x86, 64-bit: split set_pte_vaddr()

We will need to set a pte on l3_user_pgt. Extract set_pte_vaddr_pud()
from set_pte_vaddr(), that will accept the l3 page table as parameter.

This change should be a no-op for existing code.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c        | 31 ++++++++++++++++++++-----------
 include/asm-x86/pgtable_64.h |  3 +++
 2 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e62cbdd4e2d..5c3305e0507 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -144,22 +144,13 @@ static __init void *spp_getpage(void)
 }
 
 void
-set_pte_vaddr(unsigned long vaddr, pte_t new_pte)
+set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
 {
-	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 
-	pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(new_pte));
-
-	pgd = pgd_offset_k(vaddr);
-	if (pgd_none(*pgd)) {
-		printk(KERN_ERR
-			"PGD FIXMAP MISSING, it should be setup in head.S!\n");
-		return;
-	}
-	pud = pud_offset(pgd, vaddr);
+	pud = pud_page + pud_index(vaddr);
 	if (pud_none(*pud)) {
 		pmd = (pmd_t *) spp_getpage();
 		pud_populate(&init_mm, pud, pmd);
@@ -192,6 +183,24 @@ set_pte_vaddr(unsigned long vaddr, pte_t new_pte)
 	__flush_tlb_one(vaddr);
 }
 
+void
+set_pte_vaddr(unsigned long vaddr, pte_t pteval)
+{
+	pgd_t *pgd;
+	pud_t *pud_page;
+
+	pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval));
+
+	pgd = pgd_offset_k(vaddr);
+	if (pgd_none(*pgd)) {
+		printk(KERN_ERR
+			"PGD FIXMAP MISSING, it should be setup in head.S!\n");
+		return;
+	}
+	pud_page = (pud_t*)pgd_page_vaddr(*pgd);
+	set_pte_vaddr_pud(pud_page, vaddr, pteval);
+}
+
 /*
  * The head.S code sets up the kernel high mapping:
  *
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 9a9350abe4e..fa7208b483c 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -70,6 +70,9 @@ extern void paging_init(void);
 
 struct mm_struct;
 
+void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
+
+
 static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
 				    pte_t *ptep)
 {
-- 
cgit v1.2.3-70-g09d2


From d75cd22fdd5f7d203fb60014d426942df33dd9a6 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:26 -0400
Subject: x86/paravirt: split sysret and sysexit

Don't conflate sysret and sysexit; they're different instructions with
different semantics, and may be in use at the same time (at least
within the same kernel, depending on whether its an Intel or AMD
system).

sysexit - just return to userspace, does no register restoration of
    any kind; must explicitly atomically enable interrupts.

sysret - reloads flags from r11, so no need to explicitly enable
    interrupts on 64-bit, responsible for restoring usermode %gs

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citirx.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/asm-offsets_32.c    |  2 +-
 arch/x86/kernel/asm-offsets_64.c    |  2 +-
 arch/x86/kernel/entry_32.S          |  8 ++++----
 arch/x86/kernel/entry_64.S          |  4 ++--
 arch/x86/kernel/paravirt.c          | 12 +++++++++---
 arch/x86/kernel/paravirt_patch_32.c |  4 ++--
 arch/x86/kernel/paravirt_patch_64.c |  4 ++--
 arch/x86/kernel/vmi_32.c            |  4 ++--
 arch/x86/xen/enlighten.c            |  2 +-
 include/asm-x86/irqflags.h          |  4 ++--
 include/asm-x86/paravirt.h          | 15 ++++++++++-----
 11 files changed, 36 insertions(+), 25 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 92588083950..6649d09ad88 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -111,7 +111,7 @@ void foo(void)
 	OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
 	OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
 	OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
-	OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret);
+	OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
 	OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
 #endif
 
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index f126c05d617..27ac2deca46 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -62,7 +62,7 @@ int main(void)
 	OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
 	OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
 	OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
-	OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret);
+	OFFSET(PV_CPU_usersp_sysret, pv_cpu_ops, usersp_sysret);
 	OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
 	OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
 #endif
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 159a1c76d2b..53393c306e1 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -58,7 +58,7 @@
  * for paravirtualization.  The following will never clobber any registers:
  *   INTERRUPT_RETURN (aka. "iret")
  *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
- *   ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit").
+ *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
  *
  * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
  * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
@@ -349,7 +349,7 @@ sysenter_past_esp:
 	xorl %ebp,%ebp
 	TRACE_IRQS_ON
 1:	mov  PT_FS(%esp), %fs
-	ENABLE_INTERRUPTS_SYSCALL_RET
+	ENABLE_INTERRUPTS_SYSEXIT
 	CFI_ENDPROC
 .pushsection .fixup,"ax"
 2:	movl $0,PT_FS(%esp)
@@ -874,10 +874,10 @@ ENTRY(native_iret)
 .previous
 END(native_iret)
 
-ENTRY(native_irq_enable_syscall_ret)
+ENTRY(native_irq_enable_sysexit)
 	sti
 	sysexit
-END(native_irq_enable_syscall_ret)
+END(native_irq_enable_sysexit)
 #endif
 
 KPROBE_ENTRY(int3)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 6d1101469e9..0056bc4c61a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -59,7 +59,7 @@
 #endif	
 
 #ifdef CONFIG_PARAVIRT
-ENTRY(native_irq_enable_syscall_ret)
+ENTRY(native_usersp_sysret)
 	movq	%gs:pda_oldrsp,%rsp
 	swapgs
 	sysretq
@@ -275,7 +275,7 @@ sysret_check:
 	CFI_REGISTER	rip,rcx
 	RESTORE_ARGS 0,-ARG_SKIP,1
 	/*CFI_REGISTER	rflags,r11*/
-	ENABLE_INTERRUPTS_SYSCALL_RET
+	USERSP_SYSRET
 
 	CFI_RESTORE_STATE
 	/* Handle reschedules */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 78c9a1b9e6b..565ee7a990e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -140,7 +140,8 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
 		/* If the operation is a nop, then nop the callsite */
 		ret = paravirt_patch_nop();
 	else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
-		 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret))
+		 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
+		 type == PARAVIRT_PATCH(pv_cpu_ops.usersp_sysret))
 		/* If operation requires a jmp, then jmp */
 		ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
 	else
@@ -191,7 +192,8 @@ static void native_flush_tlb_single(unsigned long addr)
 
 /* These are in entry.S */
 extern void native_iret(void);
-extern void native_irq_enable_syscall_ret(void);
+extern void native_irq_enable_sysexit(void);
+extern void native_usersp_sysret(void);
 
 static int __init print_banner(void)
 {
@@ -327,7 +329,11 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.write_idt_entry = native_write_idt_entry,
 	.load_sp0 = native_load_sp0,
 
-	.irq_enable_syscall_ret = native_irq_enable_syscall_ret,
+#ifdef CONFIG_X86_32
+	.irq_enable_sysexit = native_irq_enable_sysexit,
+#else
+	.usersp_sysret = native_usersp_sysret,
+#endif
 	.iret = native_iret,
 	.swapgs = native_swapgs,
 
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 82fc5fcab4f..58262218781 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -5,7 +5,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
 DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
 DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
 DEF_NATIVE(pv_cpu_ops, iret, "iret");
-DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit");
+DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
@@ -29,7 +29,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_irq_ops, restore_fl);
 		PATCH_SITE(pv_irq_ops, save_fl);
 		PATCH_SITE(pv_cpu_ops, iret);
-		PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret);
+		PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
 		PATCH_SITE(pv_mmu_ops, read_cr2);
 		PATCH_SITE(pv_mmu_ops, read_cr3);
 		PATCH_SITE(pv_mmu_ops, write_cr3);
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 7d904e138d7..4a170552b85 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -15,7 +15,7 @@ DEF_NATIVE(pv_cpu_ops, clts, "clts");
 DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
 
 /* the three commands give us more control to how to return from a syscall */
-DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;");
+DEF_NATIVE(pv_cpu_ops, usersp_sysret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;");
 DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
 
 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
@@ -35,7 +35,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_irq_ops, irq_enable);
 		PATCH_SITE(pv_irq_ops, irq_disable);
 		PATCH_SITE(pv_cpu_ops, iret);
-		PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret);
+		PATCH_SITE(pv_cpu_ops, usersp_sysret);
 		PATCH_SITE(pv_cpu_ops, swapgs);
 		PATCH_SITE(pv_mmu_ops, read_cr2);
 		PATCH_SITE(pv_mmu_ops, read_cr3);
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 956f38927aa..946bf13b44a 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -151,7 +151,7 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
 					      insns, ip);
 		case PARAVIRT_PATCH(pv_cpu_ops.iret):
 			return patch_internal(VMI_CALL_IRET, len, insns, ip);
-		case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret):
+		case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
 			return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip);
 		default:
 			break;
@@ -896,7 +896,7 @@ static inline int __init activate_vmi(void)
 	 * the backend.  They are performance critical anyway, so requiring
 	 * a patch is not a big problem.
 	 */
-	pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0;
+	pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
 	pv_cpu_ops.iret = (void *)0xbadbab0;
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d62f14e2070..119c88fa769 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1089,7 +1089,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.read_pmc = native_read_pmc,
 
 	.iret = xen_iret,
-	.irq_enable_syscall_ret = xen_sysexit,
+	.irq_enable_sysexit = xen_sysexit,
 
 	.load_tr_desc = paravirt_nop,
 	.set_ldt = xen_set_ldt,
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index c242527f970..99ee5256a7e 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -112,13 +112,13 @@ static inline unsigned long __raw_local_irq_save(void)
 
 #ifdef CONFIG_X86_64
 #define INTERRUPT_RETURN	iretq
-#define ENABLE_INTERRUPTS_SYSCALL_RET			\
+#define USERSP_SYSRET					\
 			movq	%gs:pda_oldrsp, %rsp;	\
 			swapgs;				\
 			sysretq;
 #else
 #define INTERRUPT_RETURN		iret
-#define ENABLE_INTERRUPTS_SYSCALL_RET	sti; sysexit
+#define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
 #define GET_CR0_INTO_EAX		movl %cr0, %eax
 #endif
 
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 82cdcde4b22..2668903b70f 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -141,8 +141,9 @@ struct pv_cpu_ops {
 	u64 (*read_pmc)(int counter);
 	unsigned long long (*read_tscp)(unsigned int *aux);
 
-	/* These two are jmp to, not actually called. */
-	void (*irq_enable_syscall_ret)(void);
+	/* These three are jmp to, not actually called. */
+	void (*irq_enable_sysexit)(void);
+	void (*usersp_sysret)(void);
 	void (*iret)(void);
 
 	void (*swapgs)(void);
@@ -1480,10 +1481,10 @@ static inline unsigned long __raw_local_irq_save(void)
 		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\
 		  PV_RESTORE_REGS;)
 
-#define ENABLE_INTERRUPTS_SYSCALL_RET					\
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\
+#define ENABLE_INTERRUPTS_SYSEXIT					\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),	\
 		  CLBR_NONE,						\
-		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_syscall_ret))
+		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
 
 
 #ifdef CONFIG_X86_32
@@ -1504,6 +1505,10 @@ static inline unsigned long __raw_local_irq_save(void)
 	movq %rax, %rcx;				\
 	xorq %rax, %rax;
 
+#define USERSP_SYSRET							\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usersp_sysret),		\
+		  CLBR_NONE,						\
+		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usersp_sysret))
 #endif
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3-70-g09d2


From c7245da6ae7e5208504ff027c4e0eec69b788651 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:27 -0400
Subject: x86/paravirt, 64-bit: don't restore user rsp within sysret

There's no need to combine restoring the user rsp within the sysret
pvop, so split it out.  This makes the pvop's semantics closer to the
machine instruction.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citirx.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/asm-offsets_64.c    | 2 +-
 arch/x86/kernel/entry_64.S          | 6 +++---
 arch/x86/kernel/paravirt.c          | 6 +++---
 arch/x86/kernel/paravirt_patch_64.c | 4 ++--
 include/asm-x86/irqflags.h          | 3 +--
 include/asm-x86/paravirt.h          | 8 ++++----
 6 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 27ac2deca46..a19aba8c5bb 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -62,7 +62,7 @@ int main(void)
 	OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
 	OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
 	OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
-	OFFSET(PV_CPU_usersp_sysret, pv_cpu_ops, usersp_sysret);
+	OFFSET(PV_CPU_usergs_sysret, pv_cpu_ops, usergs_sysret);
 	OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
 	OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
 #endif
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 0056bc4c61a..18447a373fb 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -59,8 +59,7 @@
 #endif	
 
 #ifdef CONFIG_PARAVIRT
-ENTRY(native_usersp_sysret)
-	movq	%gs:pda_oldrsp,%rsp
+ENTRY(native_usergs_sysret)
 	swapgs
 	sysretq
 #endif /* CONFIG_PARAVIRT */
@@ -275,7 +274,8 @@ sysret_check:
 	CFI_REGISTER	rip,rcx
 	RESTORE_ARGS 0,-ARG_SKIP,1
 	/*CFI_REGISTER	rflags,r11*/
-	USERSP_SYSRET
+	movq	%gs:pda_oldrsp, %rsp
+	USERGS_SYSRET
 
 	CFI_RESTORE_STATE
 	/* Handle reschedules */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 565ee7a990e..b0b17f0bc7e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -141,7 +141,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
 		ret = paravirt_patch_nop();
 	else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
 		 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
-		 type == PARAVIRT_PATCH(pv_cpu_ops.usersp_sysret))
+		 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret))
 		/* If operation requires a jmp, then jmp */
 		ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
 	else
@@ -193,7 +193,7 @@ static void native_flush_tlb_single(unsigned long addr)
 /* These are in entry.S */
 extern void native_iret(void);
 extern void native_irq_enable_sysexit(void);
-extern void native_usersp_sysret(void);
+extern void native_usergs_sysret(void);
 
 static int __init print_banner(void)
 {
@@ -332,7 +332,7 @@ struct pv_cpu_ops pv_cpu_ops = {
 #ifdef CONFIG_X86_32
 	.irq_enable_sysexit = native_irq_enable_sysexit,
 #else
-	.usersp_sysret = native_usersp_sysret,
+	.usergs_sysret = native_usergs_sysret,
 #endif
 	.iret = native_iret,
 	.swapgs = native_swapgs,
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 4a170552b85..d4c0712a3e6 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -15,7 +15,7 @@ DEF_NATIVE(pv_cpu_ops, clts, "clts");
 DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
 
 /* the three commands give us more control to how to return from a syscall */
-DEF_NATIVE(pv_cpu_ops, usersp_sysret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;");
+DEF_NATIVE(pv_cpu_ops, usergs_sysret, "swapgs; sysretq;");
 DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
 
 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
@@ -35,7 +35,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_irq_ops, irq_enable);
 		PATCH_SITE(pv_irq_ops, irq_disable);
 		PATCH_SITE(pv_cpu_ops, iret);
-		PATCH_SITE(pv_cpu_ops, usersp_sysret);
+		PATCH_SITE(pv_cpu_ops, usergs_sysret);
 		PATCH_SITE(pv_cpu_ops, swapgs);
 		PATCH_SITE(pv_mmu_ops, read_cr2);
 		PATCH_SITE(pv_mmu_ops, read_cr3);
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index 99ee5256a7e..544836c96b6 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -112,8 +112,7 @@ static inline unsigned long __raw_local_irq_save(void)
 
 #ifdef CONFIG_X86_64
 #define INTERRUPT_RETURN	iretq
-#define USERSP_SYSRET					\
-			movq	%gs:pda_oldrsp, %rsp;	\
+#define USERGS_SYSRET					\
 			swapgs;				\
 			sysretq;
 #else
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 2668903b70f..dad5b4186f5 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -143,7 +143,7 @@ struct pv_cpu_ops {
 
 	/* These three are jmp to, not actually called. */
 	void (*irq_enable_sysexit)(void);
-	void (*usersp_sysret)(void);
+	void (*usergs_sysret)(void);
 	void (*iret)(void);
 
 	void (*swapgs)(void);
@@ -1505,10 +1505,10 @@ static inline unsigned long __raw_local_irq_save(void)
 	movq %rax, %rcx;				\
 	xorq %rax, %rax;
 
-#define USERSP_SYSRET							\
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usersp_sysret),		\
+#define USERGS_SYSRET							\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret),		\
 		  CLBR_NONE,						\
-		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usersp_sysret))
+		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret))
 #endif
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3-70-g09d2


From 2be29982a08009c731307f4a39053b70ac4700da Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:28 -0400
Subject: x86/paravirt: add sysret/sysexit pvops for returning to 32-bit
 compatibility userspace

In a 64-bit system, we need separate sysret/sysexit operations to
return to a 32-bit userspace.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citirx.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32entry.S           | 21 ++++++++++----
 arch/x86/kernel/asm-offsets_64.c    |  4 ++-
 arch/x86/kernel/entry_64.S          |  4 +--
 arch/x86/kernel/paravirt.c          | 12 ++++----
 arch/x86/kernel/paravirt_patch_64.c |  9 ++++--
 include/asm-x86/irqflags.h          | 14 ++++++++--
 include/asm-x86/paravirt.h          | 56 +++++++++++++++++++++++++++++--------
 7 files changed, 89 insertions(+), 31 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 3aefbce2de4..2a4c42427d9 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -61,6 +61,19 @@
 	CFI_UNDEFINED	r15
 	.endm
 
+#ifdef CONFIG_PARAVIRT
+ENTRY(native_usergs_sysret32)
+	swapgs
+	sysretl
+ENDPROC(native_usergs_sysret32)
+
+ENTRY(native_irq_enable_sysexit)
+	swapgs
+	sti
+	sysexit
+ENDPROC(native_irq_enable_sysexit)
+#endif
+
 /*
  * 32bit SYSENTER instruction entry.
  *
@@ -151,10 +164,7 @@ sysenter_do_call:
 	CFI_ADJUST_CFA_OFFSET -8
 	CFI_REGISTER rsp,rcx
 	TRACE_IRQS_ON
-	swapgs
-	sti		/* sti only takes effect after the next instruction */
-	/* sysexit */
-	.byte	0xf, 0x35
+	ENABLE_INTERRUPTS_SYSEXIT32
 
 sysenter_tracesys:
 	CFI_RESTORE_STATE
@@ -254,8 +264,7 @@ cstar_do_call:
 	TRACE_IRQS_ON
 	movl RSP-ARGOFFSET(%rsp),%esp
 	CFI_RESTORE rsp
-	swapgs
-	sysretl
+	USERGS_SYSRET32
 	
 cstar_tracesys:	
 	CFI_RESTORE_STATE
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index a19aba8c5bb..06c451af979 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -62,7 +62,9 @@ int main(void)
 	OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
 	OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
 	OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
-	OFFSET(PV_CPU_usergs_sysret, pv_cpu_ops, usergs_sysret);
+	OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
+	OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
+	OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
 	OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
 	OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
 #endif
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 18447a373fb..880ffe510a1 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -59,7 +59,7 @@
 #endif	
 
 #ifdef CONFIG_PARAVIRT
-ENTRY(native_usergs_sysret)
+ENTRY(native_usergs_sysret64)
 	swapgs
 	sysretq
 #endif /* CONFIG_PARAVIRT */
@@ -275,7 +275,7 @@ sysret_check:
 	RESTORE_ARGS 0,-ARG_SKIP,1
 	/*CFI_REGISTER	rflags,r11*/
 	movq	%gs:pda_oldrsp, %rsp
-	USERGS_SYSRET
+	USERGS_SYSRET64
 
 	CFI_RESTORE_STATE
 	/* Handle reschedules */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index b0b17f0bc7e..bf1067e89ca 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -141,7 +141,8 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
 		ret = paravirt_patch_nop();
 	else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
 		 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
-		 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret))
+		 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
+		 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
 		/* If operation requires a jmp, then jmp */
 		ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
 	else
@@ -193,7 +194,8 @@ static void native_flush_tlb_single(unsigned long addr)
 /* These are in entry.S */
 extern void native_iret(void);
 extern void native_irq_enable_sysexit(void);
-extern void native_usergs_sysret(void);
+extern void native_usergs_sysret32(void);
+extern void native_usergs_sysret64(void);
 
 static int __init print_banner(void)
 {
@@ -329,10 +331,10 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.write_idt_entry = native_write_idt_entry,
 	.load_sp0 = native_load_sp0,
 
-#ifdef CONFIG_X86_32
 	.irq_enable_sysexit = native_irq_enable_sysexit,
-#else
-	.usergs_sysret = native_usergs_sysret,
+#ifdef CONFIG_X86_64
+	.usergs_sysret32 = native_usergs_sysret32,
+	.usergs_sysret64 = native_usergs_sysret64,
 #endif
 	.iret = native_iret,
 	.swapgs = native_swapgs,
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index d4c0712a3e6..061d01df9ae 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -14,8 +14,9 @@ DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
 DEF_NATIVE(pv_cpu_ops, clts, "clts");
 DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
 
-/* the three commands give us more control to how to return from a syscall */
-DEF_NATIVE(pv_cpu_ops, usergs_sysret, "swapgs; sysretq;");
+DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit");
+DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
+DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
 DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
 
 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
@@ -35,7 +36,9 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_irq_ops, irq_enable);
 		PATCH_SITE(pv_irq_ops, irq_disable);
 		PATCH_SITE(pv_cpu_ops, iret);
-		PATCH_SITE(pv_cpu_ops, usergs_sysret);
+		PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
+		PATCH_SITE(pv_cpu_ops, usergs_sysret32);
+		PATCH_SITE(pv_cpu_ops, usergs_sysret64);
 		PATCH_SITE(pv_cpu_ops, swapgs);
 		PATCH_SITE(pv_mmu_ops, read_cr2);
 		PATCH_SITE(pv_mmu_ops, read_cr3);
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index 544836c96b6..ea9bd2635d5 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -112,9 +112,17 @@ static inline unsigned long __raw_local_irq_save(void)
 
 #ifdef CONFIG_X86_64
 #define INTERRUPT_RETURN	iretq
-#define USERGS_SYSRET					\
-			swapgs;				\
-			sysretq;
+#define USERGS_SYSRET64				\
+	swapgs;					\
+	sysretq;
+#define USERGS_SYSRET32				\
+	swapgs;					\
+	sysretl
+#define ENABLE_INTERRUPTS_SYSEXIT32		\
+	swapgs;					\
+	sti;					\
+	sysexit
+
 #else
 #define INTERRUPT_RETURN		iret
 #define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index dad5b4186f5..33f72f8fe75 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -141,9 +141,32 @@ struct pv_cpu_ops {
 	u64 (*read_pmc)(int counter);
 	unsigned long long (*read_tscp)(unsigned int *aux);
 
-	/* These three are jmp to, not actually called. */
+	/*
+	 * Atomically enable interrupts and return to userspace.  This
+	 * is only ever used to return to 32-bit processes; in a
+	 * 64-bit kernel, it's used for 32-on-64 compat processes, but
+	 * never native 64-bit processes.  (Jump, not call.)
+	 */
 	void (*irq_enable_sysexit)(void);
-	void (*usergs_sysret)(void);
+
+	/*
+	 * Switch to usermode gs and return to 64-bit usermode using
+	 * sysret.  Only used in 64-bit kernels to return to 64-bit
+	 * processes.  Usermode register state, including %rsp, must
+	 * already be restored.
+	 */
+	void (*usergs_sysret64)(void);
+
+	/*
+	 * Switch to usermode gs and return to 32-bit usermode using
+	 * sysret.  Used to return to 32-on-64 compat processes.
+	 * Other usermode register state, including %esp, must already
+	 * be restored.
+	 */
+	void (*usergs_sysret32)(void);
+
+	/* Normal iret.  Jump to this with the standard iret stack
+	   frame set up. */
 	void (*iret)(void);
 
 	void (*swapgs)(void);
@@ -1481,18 +1504,24 @@ static inline unsigned long __raw_local_irq_save(void)
 		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\
 		  PV_RESTORE_REGS;)
 
-#define ENABLE_INTERRUPTS_SYSEXIT					\
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),	\
+#define USERGS_SYSRET32							\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32),	\
 		  CLBR_NONE,						\
-		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
-
+		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
 
 #ifdef CONFIG_X86_32
 #define GET_CR0_INTO_EAX				\
 	push %ecx; push %edx;				\
 	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0);	\
 	pop %edx; pop %ecx
-#else
+
+#define ENABLE_INTERRUPTS_SYSEXIT					\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),	\
+		  CLBR_NONE,						\
+		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
+
+
+#else	/* !CONFIG_X86_32 */
 #define SWAPGS								\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
 		  PV_SAVE_REGS;						\
@@ -1505,11 +1534,16 @@ static inline unsigned long __raw_local_irq_save(void)
 	movq %rax, %rcx;				\
 	xorq %rax, %rax;
 
-#define USERGS_SYSRET							\
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret),		\
+#define USERGS_SYSRET64							\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
 		  CLBR_NONE,						\
-		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret))
-#endif
+		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+
+#define ENABLE_INTERRUPTS_SYSEXIT32					\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),	\
+		  CLBR_NONE,						\
+		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
+#endif	/* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_PARAVIRT */
-- 
cgit v1.2.3-70-g09d2


From a00394f81f419beb6fb9f7023bd4d15913dc625d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:30 -0400
Subject: x86, 64-bit: swapgs pvop with a user-stack can never be called

It's never safe to call a swapgs pvop when the user stack is current -
it must be inline replaced.  Rather than making a call, the
SWAPGS_UNSAFE_STACK pvop always just puts "swapgs" as a placeholder,
which must either be replaced inline or trap'n'emulated (somehow).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/irqflags.h |  2 +-
 include/asm-x86/paravirt.h | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index ea9bd2635d5..d17e1f623db 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -111,6 +111,7 @@ static inline unsigned long __raw_local_irq_save(void)
 #define DISABLE_INTERRUPTS(x)	cli
 
 #ifdef CONFIG_X86_64
+#define SWAPGS_UNSAFE_STACK	swapgs
 #define INTERRUPT_RETURN	iretq
 #define USERGS_SYSRET64				\
 	swapgs;					\
@@ -185,7 +186,6 @@ static inline void trace_hardirqs_fixup(void)
  * Either way, this is a good way to document that we don't
  * have a reliable stack. x86_64 only.
  */
-#define SWAPGS_UNSAFE_STACK	swapgs
 #define ARCH_TRACE_IRQS_ON		call trace_hardirqs_on_thunk
 #define ARCH_TRACE_IRQS_OFF		call trace_hardirqs_off_thunk
 #define ARCH_LOCKDEP_SYS_EXIT		call lockdep_sys_exit_thunk
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 33f72f8fe75..3286a0c63b4 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1522,6 +1522,16 @@ static inline unsigned long __raw_local_irq_save(void)
 
 
 #else	/* !CONFIG_X86_32 */
+
+/*
+ * If swapgs is used while the userspace stack is still current,
+ * there's no way to call a pvop.  The PV replacement *must* be
+ * inlined, or the swapgs instruction must be trapped and emulated.
+ */
+#define SWAPGS_UNSAFE_STACK						\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
+		  swapgs)
+
 #define SWAPGS								\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
 		  PV_SAVE_REGS;						\
-- 
cgit v1.2.3-70-g09d2


From fab58420ac0007a452b540cfb07923225ea4f48d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:31 -0400
Subject: x86/paravirt, 64-bit: add adjust_exception_frame

64-bit Xen pushes a couple of extra words onto an exception frame.
Add a hook to deal with them.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/asm-offsets_64.c | 1 +
 arch/x86/kernel/entry_64.S       | 2 ++
 arch/x86/kernel/paravirt.c       | 3 +++
 arch/x86/xen/enlighten.c         | 3 +++
 include/asm-x86/paravirt.h       | 9 +++++++++
 include/asm-x86/processor.h      | 2 ++
 6 files changed, 20 insertions(+)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 06c451af979..3295e7c08fe 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -61,6 +61,7 @@ int main(void)
 	OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
 	OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
 	OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
+	OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
 	OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
 	OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
 	OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 880ffe510a1..70fe13a1c41 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -736,6 +736,7 @@ END(spurious_interrupt)
  */ 		
 	.macro zeroentry sym
 	INTR_FRAME
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq $0	/* push error code/oldrax */ 
 	CFI_ADJUST_CFA_OFFSET 8
 	pushq %rax	/* push real oldrax to the rdi slot */ 
@@ -748,6 +749,7 @@ END(spurious_interrupt)
 
 	.macro errorentry sym
 	XCPT_FRAME
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq %rax
 	CFI_ADJUST_CFA_OFFSET 8
 	CFI_REL_OFFSET rax,0
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index bf1067e89ca..b20c369cb89 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -296,6 +296,9 @@ struct pv_irq_ops pv_irq_ops = {
 	.irq_enable = native_irq_enable,
 	.safe_halt = native_safe_halt,
 	.halt = native_halt,
+#ifdef CONFIG_X86_64
+	.adjust_exception_frame = paravirt_nop,
+#endif
 };
 
 struct pv_cpu_ops pv_cpu_ops = {
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 119c88fa769..3b980831602 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1123,6 +1123,9 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
 	.irq_enable = xen_irq_enable,
 	.safe_halt = xen_safe_halt,
 	.halt = xen_halt,
+#ifdef CONFIG_X86_64
+	.adjust_exception_frame = paravirt_nop,
+#endif
 };
 
 static const struct pv_apic_ops xen_apic_ops __initdata = {
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 3286a0c63b4..3dc223da200 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -189,6 +189,10 @@ struct pv_irq_ops {
 	void (*irq_enable)(void);
 	void (*safe_halt)(void);
 	void (*halt)(void);
+
+#ifdef CONFIG_X86_64
+	void (*adjust_exception_frame)(void);
+#endif
 };
 
 struct pv_apic_ops {
@@ -1544,6 +1548,11 @@ static inline unsigned long __raw_local_irq_save(void)
 	movq %rax, %rcx;				\
 	xorq %rax, %rax;
 
+#define PARAVIRT_ADJUST_EXCEPTION_FRAME					\
+	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
+		  CLBR_NONE,						\
+		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
+
 #define USERGS_SYSRET64							\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
 		  CLBR_NONE,						\
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 12b5ede14c7..df2459f5ebb 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -532,6 +532,8 @@ static inline void load_sp0(struct tss_struct *tss,
 
 #define set_iopl_mask native_set_iopl_mask
 #define SWAPGS	swapgs
+
+#define PARAVIRT_ADJUST_EXCEPTION_FRAME	/*  */
 #endif /* CONFIG_PARAVIRT */
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 9f9d489a3e78b49d897734eaaf9dea568dbea66e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 25 Jun 2008 00:19:32 -0400
Subject: x86/paravirt, 64-bit: make load_gs_index() a paravirt operation

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S |  4 ++--
 arch/x86/kernel/paravirt.c |  3 +++
 include/asm-x86/elf.h      |  2 +-
 include/asm-x86/paravirt.h | 10 ++++++++++
 include/asm-x86/system.h   |  3 ++-
 5 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 70fe13a1c41..07d69f26233 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -944,7 +944,7 @@ KPROBE_END(error_entry)
 	
        /* Reload gs selector with exception handling */
        /* edi:  new selector */ 
-ENTRY(load_gs_index)
+ENTRY(native_load_gs_index)
 	CFI_STARTPROC
 	pushf
 	CFI_ADJUST_CFA_OFFSET 8
@@ -958,7 +958,7 @@ gs_change:
 	CFI_ADJUST_CFA_OFFSET -8
         ret
 	CFI_ENDPROC
-ENDPROC(load_gs_index)
+ENDPROC(native_load_gs_index)
        
         .section __ex_table,"a"
         .align 8
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index b20c369cb89..27819e3e424 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -329,6 +329,9 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.store_idt = native_store_idt,
 	.store_tr = native_store_tr,
 	.load_tls = native_load_tls,
+#ifdef CONFIG_X86_64
+	.load_gs_index = native_load_gs_index,
+#endif
 	.write_ldt_entry = native_write_ldt_entry,
 	.write_gdt_entry = native_write_gdt_entry,
 	.write_idt_entry = native_write_idt_entry,
diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h
index 8f232dc5b5f..7be4733c793 100644
--- a/include/asm-x86/elf.h
+++ b/include/asm-x86/elf.h
@@ -83,9 +83,9 @@ extern unsigned int vdso_enabled;
 	(((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
 
 #include <asm/processor.h>
+#include <asm/system.h>
 
 #ifdef CONFIG_X86_32
-#include <asm/system.h>		/* for savesegment */
 #include <asm/desc.h>
 
 #define elf_check_arch(x)	elf_check_arch_ia32(x)
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 3dc223da200..6d8966f9d19 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -115,6 +115,9 @@ struct pv_cpu_ops {
 	void (*set_ldt)(const void *desc, unsigned entries);
 	unsigned long (*store_tr)(void);
 	void (*load_tls)(struct thread_struct *t, unsigned int cpu);
+#ifdef CONFIG_X86_64
+	void (*load_gs_index)(unsigned int idx);
+#endif
 	void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
 				const void *desc);
 	void (*write_gdt_entry)(struct desc_struct *,
@@ -845,6 +848,13 @@ static inline void load_TLS(struct thread_struct *t, unsigned cpu)
 	PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
 }
 
+#ifdef CONFIG_X86_64
+static inline void load_gs_index(unsigned int gs)
+{
+	PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs);
+}
+#endif
+
 static inline void write_ldt_entry(struct desc_struct *dt, int entry,
 				   const void *desc)
 {
diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index f686aa6abfe..c4946c5964b 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -136,7 +136,7 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \
 #define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
 #define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
 
-extern void load_gs_index(unsigned);
+extern void native_load_gs_index(unsigned);
 
 /*
  * Load a segment. Fall back on loading the zero
@@ -282,6 +282,7 @@ static inline void native_wbinvd(void)
 #ifdef CONFIG_X86_64
 #define read_cr8()	(native_read_cr8())
 #define write_cr8(x)	(native_write_cr8(x))
+#define load_gs_index   native_load_gs_index
 #endif
 
 /* Clear the 'TS' bit */
-- 
cgit v1.2.3-70-g09d2


From 042623bbabae168246ad8a37693f0ecb6c450aea Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 25 Jun 2008 19:52:15 -0700
Subject: x86: clean up ARCH_SETUP

asm-x86/paravirt.h already have protection with CONFIG_PARAVIRT inside

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c                   | 8 ++++----
 include/asm-x86/mach-default/setup_arch.h | 4 ----
 include/asm-x86/mach-visws/setup_arch.h   | 2 --
 3 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 63dbb5e8f7e..161609c6925 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -101,11 +101,7 @@
 #include <asm/proto.h>
 
 #include <mach_apic.h>
-#ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
-#else
-#define ARCH_SETUP
-#endif
 
 #include <asm/percpu.h>
 #include <asm/sections.h>
@@ -115,6 +111,10 @@
 #include <asm/numa_64.h>
 #endif
 
+#ifndef ARCH_SETUP
+#define ARCH_SETUP
+#endif
+
 #ifndef CONFIG_DEBUG_BOOT_PARAMS
 struct boot_params __initdata boot_params;
 #else
diff --git a/include/asm-x86/mach-default/setup_arch.h b/include/asm-x86/mach-default/setup_arch.h
index 605e3ccb991..38846208b54 100644
--- a/include/asm-x86/mach-default/setup_arch.h
+++ b/include/asm-x86/mach-default/setup_arch.h
@@ -1,7 +1,3 @@
 /* Hook to call BIOS initialisation function */
 
 /* no action for generic */
-
-#ifndef ARCH_SETUP
-#define ARCH_SETUP
-#endif
diff --git a/include/asm-x86/mach-visws/setup_arch.h b/include/asm-x86/mach-visws/setup_arch.h
index 33f700ef683..b8f5dd829db 100644
--- a/include/asm-x86/mach-visws/setup_arch.h
+++ b/include/asm-x86/mach-visws/setup_arch.h
@@ -4,5 +4,3 @@ extern unsigned long sgivwfb_mem_phys;
 extern unsigned long sgivwfb_mem_size;
 
 /* no action for visws */
-
-#define ARCH_SETUP
-- 
cgit v1.2.3-70-g09d2


From f3294a33e765d8308c3e17b951a13e0db9cf5f00 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Fri, 27 Jun 2008 01:41:56 -0700
Subject: x86: let setup_arch call init_apic_mappings for 32bit

instead of calling it from trap_init()

also move init ioapic mapping out of apic_32.c

so 32 bit do same as 64 bit

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c    | 30 ------------------------------
 arch/x86/kernel/io_apic_32.c | 32 ++++++++++++++++++++++++++++++++
 arch/x86/kernel/setup.c      |  6 ++----
 arch/x86/kernel/traps_32.c   |  4 ----
 include/asm-x86/apic.h       |  1 +
 include/asm-x86/io_apic.h    |  1 +
 6 files changed, 36 insertions(+), 38 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index b5571fb0f77..8fbad8ed0eb 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1197,36 +1197,6 @@ void __init init_apic_mappings(void)
 	if (boot_cpu_physical_apicid == -1U)
 		boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 
-#ifdef CONFIG_X86_IO_APIC
-	{
-		unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-		int i;
-
-		for (i = 0; i < nr_ioapics; i++) {
-			if (smp_found_config) {
-				ioapic_phys = mp_ioapics[i].mp_apicaddr;
-				if (!ioapic_phys) {
-					printk(KERN_ERR
-					       "WARNING: bogus zero IO-APIC "
-					       "address found in MPTABLE, "
-					       "disabling IO/APIC support!\n");
-					smp_found_config = 0;
-					skip_ioapic_setup = 1;
-					goto fake_ioapic_page;
-				}
-			} else {
-fake_ioapic_page:
-				ioapic_phys = (unsigned long)
-					      alloc_bootmem_pages(PAGE_SIZE);
-				ioapic_phys = __pa(ioapic_phys);
-			}
-			set_fixmap_nocache(idx, ioapic_phys);
-			printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
-			       __fix_to_virt(idx), ioapic_phys);
-			idx++;
-		}
-	}
-#endif
 }
 
 /*
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index d6af301c822..337ec3438a8 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -25,6 +25,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
+#include <linux/bootmem.h>
 #include <linux/mc146818rtc.h>
 #include <linux/compiler.h>
 #include <linux/acpi.h>
@@ -2852,3 +2853,34 @@ static int __init parse_noapic(char *arg)
 	return 0;
 }
 early_param("noapic", parse_noapic);
+
+void __init ioapic_init_mappings(void)
+{
+	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
+	int i;
+
+	for (i = 0; i < nr_ioapics; i++) {
+		if (smp_found_config) {
+			ioapic_phys = mp_ioapics[i].mp_apicaddr;
+			if (!ioapic_phys) {
+				printk(KERN_ERR
+				       "WARNING: bogus zero IO-APIC "
+				       "address found in MPTABLE, "
+				       "disabling IO/APIC support!\n");
+				smp_found_config = 0;
+				skip_ioapic_setup = 1;
+				goto fake_ioapic_page;
+			}
+		} else {
+fake_ioapic_page:
+			ioapic_phys = (unsigned long)
+				      alloc_bootmem_pages(PAGE_SIZE);
+			ioapic_phys = __pa(ioapic_phys);
+		}
+		set_fixmap_nocache(idx, ioapic_phys);
+		printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
+		       __fix_to_virt(idx), ioapic_phys);
+		idx++;
+	}
+}
+
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index bf528b23750..4716460607b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -766,16 +766,14 @@ void __init setup_arch(char **cmdline_p)
 		get_smp_config();
 #endif
 
-#ifdef CONFIG_X86_64
 	init_apic_mappings();
 	ioapic_init_mappings();
-#else
-# if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
+
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
 	if (def_to_bigsmp)
 		printk(KERN_WARNING "More than 8 CPUs detected and "
 			"CONFIG_X86_PC cannot handle it.\nUse "
 			"CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
-# endif
 #endif
 	kvm_guest_init();
 
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index dc7c05e5cfe..f60feee8325 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1198,10 +1198,6 @@ void __init trap_init(void)
 	early_iounmap(p, 4);
 #endif
 
-#ifdef CONFIG_X86_LOCAL_APIC
-	init_apic_mappings();
-#endif
-	set_trap_gate(0,  &divide_error);
 	set_intr_gate(1,  &debug);
 	set_intr_gate(2,  &nmi);
 	set_system_intr_gate(3, &int3); /* int3/4 can be called from all */
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 9fe941cd843..6ae6ae68d4c 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -135,6 +135,7 @@ extern int apic_is_clustered_box(void);
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
 #define local_apic_timer_c2_ok		1
+static inline void init_apic_mappings(void) { }
 
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 8b1f5684842..14f82bbcb5f 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -186,6 +186,7 @@ extern void ioapic_init_mappings(void);
 #else  /* !CONFIG_X86_IO_APIC */
 #define io_apic_assign_pci_irqs 0
 static const int timer_through_8259 = 0;
+static inline void ioapic_init_mappings(void) { }
 #endif
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From df366e9822beca97115ba9745cbe1ea1f26fb111 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Fri, 27 Jun 2008 12:04:03 -0700
Subject: x86_64: fix non-paravirt compilation

Make sure SWAPGS and PARAVIRT_ADJUST_EXCEPTION_FRAME are properly
defined when CONFIG_PARAVIRT is off.

Fixes Ingo's build failure:
arch/x86/kernel/entry_64.S: Assembler messages:
arch/x86/kernel/entry_64.S:1201: Error: invalid character '_' in mnemonic
arch/x86/kernel/entry_64.S:1205: Error: invalid character '_' in mnemonic
arch/x86/kernel/entry_64.S:1209: Error: invalid character '_' in mnemonic
arch/x86/kernel/entry_64.S:1213: Error: invalid character '_' in mnemonic

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mark McLoughlin <markmc@redhat.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Stephen Tweedie <sct@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/irqflags.h  | 22 +++++++++++++---------
 include/asm-x86/processor.h |  3 ---
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index d17e1f623db..17e7a1701c9 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -111,7 +111,20 @@ static inline unsigned long __raw_local_irq_save(void)
 #define DISABLE_INTERRUPTS(x)	cli
 
 #ifdef CONFIG_X86_64
+#define SWAPGS	swapgs
+/*
+ * Currently paravirt can't handle swapgs nicely when we
+ * don't have a stack we can rely on (such as a user space
+ * stack).  So we either find a way around these or just fault
+ * and emulate if a guest tries to call swapgs directly.
+ *
+ * Either way, this is a good way to document that we don't
+ * have a reliable stack. x86_64 only.
+ */
 #define SWAPGS_UNSAFE_STACK	swapgs
+
+#define PARAVIRT_ADJUST_EXCEPTION_FRAME	/*  */
+
 #define INTERRUPT_RETURN	iretq
 #define USERGS_SYSRET64				\
 	swapgs;					\
@@ -177,15 +190,6 @@ static inline void trace_hardirqs_fixup(void)
 #else
 
 #ifdef CONFIG_X86_64
-/*
- * Currently paravirt can't handle swapgs nicely when we
- * don't have a stack we can rely on (such as a user space
- * stack).  So we either find a way around these or just fault
- * and emulate if a guest tries to call swapgs directly.
- *
- * Either way, this is a good way to document that we don't
- * have a reliable stack. x86_64 only.
- */
 #define ARCH_TRACE_IRQS_ON		call trace_hardirqs_on_thunk
 #define ARCH_TRACE_IRQS_OFF		call trace_hardirqs_off_thunk
 #define ARCH_LOCKDEP_SYS_EXIT		call lockdep_sys_exit_thunk
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index df2459f5ebb..7f738270459 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -531,9 +531,6 @@ static inline void load_sp0(struct tss_struct *tss,
 }
 
 #define set_iopl_mask native_set_iopl_mask
-#define SWAPGS	swapgs
-
-#define PARAVIRT_ADJUST_EXCEPTION_FRAME	/*  */
 #endif /* CONFIG_PARAVIRT */
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 914bebfad42c417b84bda8920a3073d236007fde Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 29 Jun 2008 00:06:37 -0700
Subject: x86: use disable_apic in 32bit

change the enable_local_apic to static force_enable_local_apic for 32bit

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c | 15 ++++++++-------
 arch/x86/kernel/setup.c   |  4 ----
 include/asm-x86/apic.h    |  4 ----
 3 files changed, 8 insertions(+), 15 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 8fbad8ed0eb..6dea8306d8c 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -55,9 +55,10 @@ unsigned long mp_lapic_addr;
 /*
  * Knob to control our willingness to enable the local APIC.
  *
- * -1=force-disable, +1=force-enable
+ * +1=force-enable
  */
-int enable_local_apic;
+static int force_enable_local_apic;
+int disable_apic;
 
 /* Local APIC timer verification ok */
 static int local_apic_timer_verify_ok;
@@ -1099,7 +1100,7 @@ static int __init detect_init_APIC(void)
 	u32 h, l, features;
 
 	/* Disabled by kernel option? */
-	if (enable_local_apic < 0)
+	if (disable_apic)
 		return -1;
 
 	switch (boot_cpu_data.x86_vendor) {
@@ -1122,7 +1123,7 @@ static int __init detect_init_APIC(void)
 		 * Over-ride BIOS and try to enable the local APIC only if
 		 * "lapic" specified.
 		 */
-		if (enable_local_apic <= 0) {
+		if (!force_enable_local_apic) {
 			printk(KERN_INFO "Local APIC disabled by BIOS -- "
 			       "you can enable it with \"lapic\"\n");
 			return -1;
@@ -1208,7 +1209,7 @@ int apic_version[MAX_APICS];
 
 int __init APIC_init_uniprocessor(void)
 {
-	if (enable_local_apic < 0)
+	if (disable_apic)
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 
 	if (!smp_found_config && !cpu_has_apic)
@@ -1682,14 +1683,14 @@ static void apic_pm_activate(void) { }
  */
 static int __init parse_lapic(char *arg)
 {
-	enable_local_apic = 1;
+	force_enable_local_apic = 1;
 	return 0;
 }
 early_param("lapic", parse_lapic);
 
 static int __init parse_nolapic(char *arg)
 {
-	enable_local_apic = -1;
+	disable_apic = 1;
 	clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 	return 0;
 }
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4716460607b..fb318edd8bc 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -598,11 +598,7 @@ void __init setup_arch(char **cmdline_p)
 
 	if (acpi_mps_check()) {
 #ifdef CONFIG_X86_LOCAL_APIC
-#ifdef CONFIG_X86_32
-		enable_local_apic = -1;
-#else
 		disable_apic = 1;
-#endif
 #endif
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 	}
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 6ae6ae68d4c..a29807737d3 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -40,11 +40,7 @@ extern int local_apic_timer_c2_ok;
 
 extern int ioapic_force;
 
-#ifdef CONFIG_X86_64
 extern int disable_apic;
-#else
-extern int enable_local_apic;
-#endif
 /*
  * Basic functions accessing APICs.
  */
-- 
cgit v1.2.3-70-g09d2


From 3ae960a598b9dfe87b29eb70738d91a13e692498 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 30 Jun 2008 10:33:47 +0200
Subject: - x86: move early_ioremap prototypes to io.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

now that the early-ioremap code is unified, move the prototypes too from
io_32.h to io.h.

this fixes:

arch/x86/kernel/setup.c:531: error: implicit declaration of function ‘early_ioremap_init'

on 64-bit.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/io.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h
index c63563df4ac..bf5d629b3a3 100644
--- a/include/asm-x86/io.h
+++ b/include/asm-x86/io.h
@@ -5,6 +5,20 @@
 
 #include <linux/compiler.h>
 
+/*
+ * early_ioremap() and early_iounmap() are for temporary early boot-time
+ * mappings, before the real ioremap() is functional.
+ * A boot-time mapping is currently limited to at most 16 pages.
+ */
+#ifndef __ASSEMBLY__
+extern void early_ioremap_init(void);
+extern void early_ioremap_clear(void);
+extern void early_ioremap_reset(void);
+extern void *early_ioremap(unsigned long offset, unsigned long size);
+extern void early_iounmap(void *addr, unsigned long size);
+extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
+#endif
+
 #define build_mmio_read(name, size, type, reg, barrier) \
 static inline type name(const volatile void __iomem *addr) \
 { type ret; asm volatile("mov" size " %1,%0":"=" reg (ret) \
-- 
cgit v1.2.3-70-g09d2


From 28bb22379513ca3cac9d13766064a219c5fc21a9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Mon, 30 Jun 2008 16:20:54 -0700
Subject: x86: move reserve_setup_data to setup.c

Ying Huang would like setup_data to be reserved, but not included in the
no save range.

Here we try to modify the e820 table to reserve that range early.
also add that in early_res in case bootloader messes up with the ramdisk.

other solution would be
1. add early_res_to_highmem...
2. early_res_to_e820...
but they could reserve another type memory wrongly, if early_res has some
resource reserved early, and not needed later, but it is not removed from
early_res in time. Like the RAMDISK (already handled).

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: andi@firstfloor.org
Tested-by: Huang, Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c      |  4 +++-
 arch/x86/kernel/head.c      | 18 ------------------
 arch/x86/kernel/head64.c    |  1 -
 arch/x86/kernel/setup.c     | 34 ++++++++++++++++++++++++++++------
 include/asm-x86/bootparam.h |  2 --
 include/asm-x86/e820.h      |  3 +++
 6 files changed, 34 insertions(+), 28 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index ba5ac880ea1..e03b89ac8f2 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -120,6 +120,7 @@ void __init e820_print_map(char *who)
 		       (e820.map[i].addr + e820.map[i].size));
 		switch (e820.map[i].type) {
 		case E820_RAM:
+		case E820_RESERVED_KERN:
 			printk(KERN_CONT "(usable)\n");
 			break;
 		case E820_RESERVED:
@@ -611,7 +612,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
 			register_nosave_region(pfn, PFN_UP(ei->addr));
 
 		pfn = PFN_DOWN(ei->addr + ei->size);
-		if (ei->type != E820_RAM)
+		if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
 			register_nosave_region(PFN_UP(ei->addr), pfn);
 
 		if (pfn >= limit_pfn)
@@ -1207,6 +1208,7 @@ void __init e820_reserve_resources(void)
 	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
 	for (i = 0; i < e820.nr_map; i++) {
 		switch (e820.map[i].type) {
+		case E820_RESERVED_KERN:
 		case E820_RAM:	res->name = "System RAM"; break;
 		case E820_ACPI:	res->name = "ACPI Tables"; break;
 		case E820_NVS:	res->name = "ACPI Non-volatile Storage"; break;
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index a6816be01cd..3e66bd364a9 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -53,21 +53,3 @@ void __init reserve_ebda_region(void)
 	/* reserve all memory between lowmem and the 1MB mark */
 	reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved");
 }
-
-void __init reserve_setup_data(void)
-{
-	struct setup_data *data;
-	u64 pa_data;
-	char buf[32];
-
-	if (boot_params.hdr.version < 0x0209)
-		return;
-	pa_data = boot_params.hdr.setup_data;
-	while (pa_data) {
-		data = early_ioremap(pa_data, sizeof(*data));
-		sprintf(buf, "setup data %x", data->type);
-		reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
-		pa_data = data->next;
-		early_iounmap(data, sizeof(*data));
-	}
-}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index f684e3b3de4..c9781982914 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -128,7 +128,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
 #endif
 
 	reserve_ebda_region();
-	reserve_setup_data();
 
 	/*
 	 * At this point everything still needed from the boot loader
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index caec79fb83a..2ca12d4c88f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -389,14 +389,34 @@ static void __init parse_setup_data(void)
 		default:
 			break;
 		}
-#ifndef CONFIG_DEBUG_BOOT_PARAMS
-		free_early(pa_data, pa_data+sizeof(*data)+data->len);
-#endif
 		pa_data = data->next;
 		early_iounmap(data, PAGE_SIZE);
 	}
 }
 
+static void __init reserve_setup_data(void)
+{
+	struct setup_data *data;
+	u64 pa_data;
+	char buf[32];
+
+	if (boot_params.hdr.version < 0x0209)
+		return;
+	pa_data = boot_params.hdr.setup_data;
+	while (pa_data) {
+		data = early_ioremap(pa_data, sizeof(*data));
+		sprintf(buf, "setup data %x", data->type);
+		reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
+		e820_update_range(pa_data, sizeof(*data)+data->len,
+			 E820_RAM, E820_RESERVED_KERN);
+		pa_data = data->next;
+		early_iounmap(data, sizeof(*data));
+	}
+	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+	printk(KERN_INFO "extended physical RAM map:\n");
+	e820_print_map("reserve setup_data");
+}
+
 /*
  * --------- Crashkernel reservation ------------------------------
  */
@@ -523,7 +543,6 @@ void __init setup_arch(char **cmdline_p)
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
 	pre_setup_arch_hook();
 	early_cpu_init();
-	reserve_setup_data();
 #else
 	printk(KERN_INFO "Command line: %s\n", boot_command_line);
 #endif
@@ -567,6 +586,8 @@ void __init setup_arch(char **cmdline_p)
 	ARCH_SETUP
 
 	setup_memory_map();
+	parse_setup_data();
+
 	copy_edd();
 
 	if (!boot_params.hdr.root_flags)
@@ -593,10 +614,11 @@ void __init setup_arch(char **cmdline_p)
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 
-	parse_setup_data();
-
 	parse_early_param();
 
+	/* after early param, so could get panic from serial */
+	reserve_setup_data();
+
 	if (acpi_mps_check()) {
 #ifdef CONFIG_X86_LOCAL_APIC
 		disable_apic = 1;
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
index 6eeba3b2812..ae22bdf0ab1 100644
--- a/include/asm-x86/bootparam.h
+++ b/include/asm-x86/bootparam.h
@@ -108,6 +108,4 @@ struct boot_params {
 	__u8  _pad9[276];				/* 0xeec */
 } __attribute__((packed));
 
-void reserve_setup_data(void);
-
 #endif /* _ASM_BOOTPARAM_H */
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index f622685c9af..45e904fe407 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -44,6 +44,9 @@
 #define E820_ACPI	3
 #define E820_NVS	4
 
+/* reserved RAM used by kernel itself */
+#define E820_RESERVED_KERN        128
+
 #ifndef __ASSEMBLY__
 struct e820entry {
 	__u64 addr;	/* start of memory segment */
-- 
cgit v1.2.3-70-g09d2


From 6a2f47ca27fad36f99e8478a3807d4b8c7db80e7 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Fri, 27 Jun 2008 10:10:13 -0700
Subject: x86: add check for node passed to node_to_cpumask, v3

  * When CONFIG_DEBUG_PER_CPU_MAPS is set, the node passed to
    node_to_cpumask and node_to_cpumask_ptr should be validated.
    If invalid, then a dump_stack is performed and a zero cpumask
    is returned.

v2: Slightly different version to remove a compiler warning.
v3: Redone to reflect moving setup.c -> setup_percpu.c

Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: "akpm@linux-foundation.org" <akpm@linux-foundation.org>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c | 26 +++++++++++++++++++++++---
 include/asm-x86/topology.h     |  7 ++++++-
 2 files changed, 29 insertions(+), 4 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 7068f95cccc..43aca2de624 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -346,6 +346,10 @@ int early_cpu_to_node(int cpu)
 	return per_cpu(x86_cpu_to_node_map, cpu);
 }
 
+
+/* empty cpumask */
+static const cpumask_t cpu_mask_none;
+
 /*
  * Returns a pointer to the bitmask of CPUs on Node 'node'.
  */
@@ -358,13 +362,23 @@ cpumask_t *_node_to_cpumask_ptr(int node)
 		dump_stack();
 		return &cpu_online_map;
 	}
-	BUG_ON(node >= nr_node_ids);
-	return &node_to_cpumask_map[node];
+	if (node >= nr_node_ids) {
+		printk(KERN_WARNING
+			"_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n",
+			node, nr_node_ids);
+		dump_stack();
+		return (cpumask_t *)&cpu_mask_none;
+	}
+	return (cpumask_t *)&node_to_cpumask_map[node];
 }
 EXPORT_SYMBOL(_node_to_cpumask_ptr);
 
 /*
  * Returns a bitmask of CPUs on Node 'node'.
+ *
+ * Side note: this function creates the returned cpumask on the stack
+ * so with a high NR_CPUS count, excessive stack space is used.  The
+ * node_to_cpumask_ptr function should be used whenever possible.
  */
 cpumask_t node_to_cpumask(int node)
 {
@@ -374,7 +388,13 @@ cpumask_t node_to_cpumask(int node)
 		dump_stack();
 		return cpu_online_map;
 	}
-	BUG_ON(node >= nr_node_ids);
+	if (node >= nr_node_ids) {
+		printk(KERN_WARNING
+			"node_to_cpumask(%d): node > nr_node_ids(%d)\n",
+			node, nr_node_ids);
+		dump_stack();
+		return cpu_mask_none;
+	}
 	return node_to_cpumask_map[node];
 }
 EXPORT_SYMBOL(node_to_cpumask);
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index 1f97758de4a..98e5f17ea85 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -57,7 +57,12 @@ static inline int cpu_to_node(int cpu)
 }
 #define early_cpu_to_node(cpu)	cpu_to_node(cpu)
 
-/* Returns a bitmask of CPUs on Node 'node'. */
+/* Returns a bitmask of CPUs on Node 'node'.
+ *
+ * Side note: this function creates the returned cpumask on the stack
+ * so with a high NR_CPUS count, excessive stack space is used.  The
+ * node_to_cpumask_ptr function should be used whenever possible.
+ */
 static inline cpumask_t node_to_cpumask(int node)
 {
 	return node_to_cpumask_map[node];
-- 
cgit v1.2.3-70-g09d2


From fd6493e16625b92a506fba13deda31c0be5f1cd4 Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Wed, 25 Jun 2008 11:02:42 -0700
Subject: x86: cleanup e820_setup_gap(), v2

e820_search_gap also take a end_addr parameter to limit search from
start_addr to end_addr.

Signed-off-by: AloK N Kataria <akataria@vmware.com>
Acked-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: "lenb@kernel.org" <lenb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c | 12 +++++++-----
 include/asm-x86/e820.h |  2 +-
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index e03b89ac8f2..d3f6c5f6d3b 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -487,17 +487,19 @@ void __init update_e820(void)
 	printk(KERN_INFO "modified physical RAM map:\n");
 	e820_print_map("modified");
 }
-
+#define MAX_GAP_END 0x100000000ull
 /*
- * Search for a gap in the e820 memory space from start_addr to 2^32.
+ * Search for a gap in the e820 memory space from start_addr to end_addr.
  */
 __init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
-		unsigned long start_addr)
+		unsigned long start_addr, unsigned long long end_addr)
 {
-	unsigned long long last = 0x100000000ull;
+	unsigned long long last;
 	int i = e820.nr_map;
 	int found = 0;
 
+	last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
+
 	while (--i >= 0) {
 		unsigned long long start = e820.map[i].addr;
 		unsigned long long end = start + e820.map[i].size;
@@ -537,7 +539,7 @@ __init void e820_setup_gap(void)
 
 	gapstart = 0x10000000;
 	gapsize = 0x400000;
-	found  = e820_search_gap(&gapstart, &gapsize, 0);
+	found  = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END);
 
 #ifdef CONFIG_X86_64
 	if (!found) {
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 45e904fe407..05a7dc33334 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -75,7 +75,7 @@ extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
 extern void update_e820(void);
 extern void e820_setup_gap(void);
 extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
-				unsigned long start_addr);
+			unsigned long start_addr, unsigned long long end_addr);
 struct setup_data;
 extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data);
 
-- 
cgit v1.2.3-70-g09d2


From dc8e8120ad291074a5fb93cfb0418466c62f6019 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 1 Jul 2008 20:02:16 -0700
Subject: x86: change copy_e820_map to append_e820_map

so it has a more meaningful name.
also change it to static.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c | 11 ++++++-----
 include/asm-x86/e820.h |  1 -
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d3f6c5f6d3b..b01fa0d0dc7 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -360,7 +360,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
 	return 0;
 }
 
-static int __init __copy_e820_map(struct e820entry *biosmap, int nr_map)
+static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
 {
 	while (nr_map) {
 		u64 start = biosmap->addr;
@@ -389,13 +389,13 @@ static int __init __copy_e820_map(struct e820entry *biosmap, int nr_map)
  * will have given us a memory map that we can use to properly
  * set up memory.  If we aren't, we'll fake a memory map.
  */
-int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
+static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
 {
 	/* Only one memory region (or negative)? Ignore it */
 	if (nr_map < 2)
 		return -1;
 
-	return __copy_e820_map(biosmap, nr_map);
+	return __append_e820_map(biosmap, nr_map);
 }
 
 u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
@@ -583,7 +583,7 @@ void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
 	if (map_len > PAGE_SIZE)
 		sdata = early_ioremap(pa_data, map_len);
 	extmap = (struct e820entry *)(sdata->data);
-	__copy_e820_map(extmap, entries);
+	__append_e820_map(extmap, entries);
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 	if (map_len > PAGE_SIZE)
 		early_iounmap(sdata, map_len);
@@ -1247,7 +1247,8 @@ char *__init default_machine_specific_memory_setup(void)
 			ARRAY_SIZE(boot_params.e820_map),
 			&new_nr);
 	boot_params.e820_entries = new_nr;
-	if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) {
+	if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
+	  < 0) {
 		u64 mem_size;
 
 		/* compare results from other methods and take the greater */
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 05a7dc33334..4b26604b3c1 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -67,7 +67,6 @@ extern void e820_add_region(u64 start, u64 size, int type);
 extern void e820_print_map(char *who);
 extern int
 sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map);
-extern int copy_e820_map(struct e820entry *biosmap, int nr_map);
 extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
 			       unsigned new_type);
 extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
-- 
cgit v1.2.3-70-g09d2


From cb95a13a8ace8612ecab042a838e5aab2ec14ef0 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 2 Jul 2008 00:31:02 -0700
Subject: x86: merge zones_sizes_init for numa and non numa on 32-bit

move out e820_register_active_regions from non numa zones_sizes_init()
and remove numa version zones_sizes_init().

and let 32 bit call remove_all_active_ranges() in setup_arch() directly
like 64-bit

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c    |  2 --
 arch/x86/mm/discontig_32.c | 16 ----------------
 arch/x86/mm/init_32.c      | 10 ++++------
 include/asm-x86/page_32.h  |  1 -
 4 files changed, 4 insertions(+), 25 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4ac01d0ce62..d5de157b02a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -749,10 +749,8 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	acpi_boot_table_init();
 
-#ifdef CONFIG_X86_64
 	/* Remove active ranges so rediscovery with NUMA-awareness happens */
 	remove_all_active_ranges();
-#endif
 
 #ifdef CONFIG_ACPI_NUMA
 	/*
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index e2f0dd13a45..fa389d20383 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -327,7 +327,6 @@ void __init initmem_init(unsigned long start_pfn,
 	 * and ZONE_HIGHMEM.
 	 */
 
-	remove_all_active_ranges();
 	get_memcfg_numa();
 
 	kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
@@ -390,21 +389,6 @@ void __init initmem_init(unsigned long start_pfn,
 	setup_bootmem_allocator();
 }
 
-void __init zone_sizes_init(void)
-{
-	unsigned long max_zone_pfns[MAX_NR_ZONES];
-	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-	max_zone_pfns[ZONE_DMA] =
-		virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
-#ifdef CONFIG_HIGHMEM
-	max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
-#endif
-
-	free_area_init_nodes(max_zone_pfns);
-	return;
-}
-
 void __init set_highmem_pages_init(void)
 {
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index aa5e37c9f4b..8efe872b961 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -660,12 +660,14 @@ void __init initmem_init(unsigned long start_pfn,
 	if (max_pfn > max_low_pfn)
 		highstart_pfn = max_low_pfn;
 	memory_present(0, 0, highend_pfn);
+	e820_register_active_regions(0, 0, highend_pfn);
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 		pages_to_mb(highend_pfn - highstart_pfn));
 	num_physpages = highend_pfn;
 	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
 	memory_present(0, 0, max_low_pfn);
+	e820_register_active_regions(0, 0, max_low_pfn);
 	num_physpages = max_low_pfn;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
@@ -677,25 +679,21 @@ void __init initmem_init(unsigned long start_pfn,
 
 	setup_bootmem_allocator();
 }
+#endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
-void __init zone_sizes_init(void)
+static void __init zone_sizes_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 	max_zone_pfns[ZONE_DMA] =
 		virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
-	remove_all_active_ranges();
 #ifdef CONFIG_HIGHMEM
 	max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
-	e820_register_active_regions(0, 0, highend_pfn);
-#else
-	e820_register_active_regions(0, 0, max_low_pfn);
 #endif
 
 	free_area_init_nodes(max_zone_pfns);
 }
-#endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
 void __init setup_bootmem_allocator(void)
 {
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index 4ae1daba129..ab8528793f0 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -96,7 +96,6 @@ extern void find_low_pfn_range(void);
 extern unsigned long init_memory_mapping(unsigned long start,
 					 unsigned long end);
 extern void initmem_init(unsigned long, unsigned long);
-extern void zone_sizes_init(void);
 extern void setup_bootmem_allocator(void);
 
 
-- 
cgit v1.2.3-70-g09d2


From 329513a35d1a2b6b28d54f5c2c0dde4face8200b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 2 Jul 2008 18:54:40 -0700
Subject: x86: move prefill_possible_map calling early

call it right after we are done with MADT/mptable handling, instead of
doing that in setup_per_cpu_areas() later on...

this way for_possible_cpu() can be used early.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c        |  1 +
 arch/x86/kernel/setup_percpu.c | 10 ----------
 arch/x86/kernel/smpboot.c      |  8 ++++++++
 include/asm-x86/smp.h          |  4 ++++
 4 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f52a6fb9023..cfcfbefee0b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -818,6 +818,7 @@ void __init setup_arch(char **cmdline_p)
 		get_smp_config();
 #endif
 
+	prefill_possible_map();
 #ifdef CONFIG_X86_64
 	init_cpu_to_node();
 #endif
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 43aca2de624..5fc310f746f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -162,16 +162,6 @@ void __init setup_per_cpu_areas(void)
 	char *ptr;
 	int cpu;
 
-	/* no processor from mptable or madt */
-	if (!num_processors)
-		num_processors = 1;
-
-#ifdef CONFIG_HOTPLUG_CPU
-	prefill_possible_map();
-#else
-	nr_cpu_ids = num_processors;
-#endif
-
 	/* Setup cpu_pda map */
 	setup_cpu_pda_map();
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3b19441d78b..e1200b202ed 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1278,12 +1278,20 @@ __init void prefill_possible_map(void)
 	int i;
 	int possible;
 
+	/* no processor from mptable or madt */
+	if (!num_processors)
+		num_processors = 1;
+
+#ifdef CONFIG_HOTPLUG_CPU
 	if (additional_cpus == -1) {
 		if (disabled_cpus > 0)
 			additional_cpus = disabled_cpus;
 		else
 			additional_cpus = 0;
 	}
+#else
+	additional_cpus = 0;
+#endif
 	possible = num_processors + additional_cpus;
 	if (possible > NR_CPUS)
 		possible = NR_CPUS;
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index fad45f6a193..b324a0645a7 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -119,6 +119,10 @@ static inline int num_booting_cpus(void)
 {
 	return cpus_weight(cpu_callout_map);
 }
+#else
+static inline void prefill_possible_map(void)
+{
+}
 #endif /* CONFIG_SMP */
 
 extern unsigned disabled_cpus __cpuinitdata;
-- 
cgit v1.2.3-70-g09d2


From 4a7017370aa0a94a00ae5b5705e9169cdcae5fb8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 3 Jul 2008 15:57:47 +0200
Subject: x86: move prefill_possible_map calling early, fix

fix:

arch/x86/kernel/built-in.o: In function `setup_arch':
: undefined reference to `prefill_possible_map'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/smp.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index b324a0645a7..2e221f1ce0b 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -109,8 +109,6 @@ int native_cpu_up(unsigned int cpunum);
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
 
-extern void prefill_possible_map(void);
-
 void smp_store_cpu_info(int id);
 #define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
 
@@ -119,11 +117,15 @@ static inline int num_booting_cpus(void)
 {
 	return cpus_weight(cpu_callout_map);
 }
+#endif /* CONFIG_SMP */
+
+#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_CPU)
+extern void prefill_possible_map(void);
 #else
 static inline void prefill_possible_map(void)
 {
 }
-#endif /* CONFIG_SMP */
+#endif
 
 extern unsigned disabled_cpus __cpuinitdata;
 
-- 
cgit v1.2.3-70-g09d2


From 8490638cf0fb3975f7636c5268f27d5daf4eaaa5 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 1 Jul 2008 16:46:35 -0700
Subject: x86: always set _PAGE_GLOBAL in _PAGE_KERNEL* flags

Consistently set _PAGE_GLOBAL in _PAGE_KERNEL flags.  This makes 32-
and 64-bit code consistent, and removes some special cases where
__PAGE_KERNEL* did not have _PAGE_GLOBAL set, causing confusion as a
result of the inconsistencies.

This patch only affects x86-64, which generally always supports PGD.
The x86-32 patch is next.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head_64.S |  4 ++--
 include/asm-x86/pgtable.h | 32 +++++++++++++-------------------
 2 files changed, 15 insertions(+), 21 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 4336f98456e..b07ac7b217c 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -374,7 +374,7 @@ NEXT_PAGE(level2_ident_pgt)
 	/* Since I easily can, map the first 1G.
 	 * Don't set NX because code runs from these pages.
 	 */
-	PMDS(0, __PAGE_KERNEL_LARGE_EXEC | _PAGE_GLOBAL, PTRS_PER_PMD)
+	PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
 
 NEXT_PAGE(level2_kernel_pgt)
 	/*
@@ -387,7 +387,7 @@ NEXT_PAGE(level2_kernel_pgt)
 	 *  If you want to increase this then increase MODULES_VADDR
 	 *  too.)
 	 */
-	PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
+	PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
 		KERNEL_IMAGE_SIZE/PMD_SIZE)
 
 NEXT_PAGE(level2_spare_pgt)
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 85f851a0b99..a1c0009e81b 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -88,7 +88,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 #endif	/* __ASSEMBLY__ */
 #else
 #define __PAGE_KERNEL_EXEC						\
-	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
 #define __PAGE_KERNEL		(__PAGE_KERNEL_EXEC | _PAGE_NX)
 #endif
 
@@ -103,24 +103,18 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 #define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
 
-#ifdef CONFIG_X86_32
-# define MAKE_GLOBAL(x)			__pgprot((x))
-#else
-# define MAKE_GLOBAL(x)			__pgprot((x) | _PAGE_GLOBAL)
-#endif
-
-#define PAGE_KERNEL			MAKE_GLOBAL(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO			MAKE_GLOBAL(__PAGE_KERNEL_RO)
-#define PAGE_KERNEL_EXEC		MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX			MAKE_GLOBAL(__PAGE_KERNEL_RX)
-#define PAGE_KERNEL_WC			MAKE_GLOBAL(__PAGE_KERNEL_WC)
-#define PAGE_KERNEL_NOCACHE		MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
-#define PAGE_KERNEL_UC_MINUS		MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS)
-#define PAGE_KERNEL_EXEC_NOCACHE	MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE)
-#define PAGE_KERNEL_LARGE		MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
-#define PAGE_KERNEL_LARGE_EXEC		MAKE_GLOBAL(__PAGE_KERNEL_LARGE_EXEC)
-#define PAGE_KERNEL_VSYSCALL		MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
-#define PAGE_KERNEL_VSYSCALL_NOCACHE	MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE)
+#define PAGE_KERNEL			__pgprot(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO			__pgprot(__PAGE_KERNEL_RO)
+#define PAGE_KERNEL_EXEC		__pgprot(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RX			__pgprot(__PAGE_KERNEL_RX)
+#define PAGE_KERNEL_WC			__pgprot(__PAGE_KERNEL_WC)
+#define PAGE_KERNEL_NOCACHE		__pgprot(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_UC_MINUS		__pgprot(__PAGE_KERNEL_UC_MINUS)
+#define PAGE_KERNEL_EXEC_NOCACHE	__pgprot(__PAGE_KERNEL_EXEC_NOCACHE)
+#define PAGE_KERNEL_LARGE		__pgprot(__PAGE_KERNEL_LARGE)
+#define PAGE_KERNEL_LARGE_EXEC		__pgprot(__PAGE_KERNEL_LARGE_EXEC)
+#define PAGE_KERNEL_VSYSCALL		__pgprot(__PAGE_KERNEL_VSYSCALL)
+#define PAGE_KERNEL_VSYSCALL_NOCACHE	__pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE)
 
 /*         xwr */
 #define __P000	PAGE_NONE
-- 
cgit v1.2.3-70-g09d2


From ef5e94af16c0c82452e1ea5d387e1203dd2198d6 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 1 Jul 2008 16:46:36 -0700
Subject: x86_32: remove __PAGE_KERNEL(_EXEC) From: Jeremy Fitzhardinge
 <jeremy.fitzhardinge@citrix.com>

Older x86-32 processors do not support global mappings (PGD), so must
only use it if the processor supports it.

The _PAGE_KERNEL* flags always have _PAGE_KERNEL set, since logically
we always want it set.

This is OK even on processors which do not support PGD, since all
_PAGE flags are masked with __supported_pte_mask before being turned
into a real in-pagetable pte.  On 32-bit systems, __supported_pte_mask
is initialized to not contain _PAGE_GLOBAL, and it is then added if
the CPU is found to support it.

The x86-32 code used to use __PAGE_KERNEL/__PAGE_KERNEL_EXEC for this
purpose, but they're now redundant and can be removed.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_32.c     | 10 ++--------
 include/asm-x86/pgtable.h | 10 ----------
 2 files changed, 2 insertions(+), 18 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8efe872b961..b5a0fd5f4c5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -383,11 +383,6 @@ static void __init set_highmem_pages_init(void)
 # define set_highmem_pages_init()	do { } while (0)
 #endif /* CONFIG_HIGHMEM */
 
-pteval_t __PAGE_KERNEL = _PAGE_KERNEL;
-EXPORT_SYMBOL(__PAGE_KERNEL);
-
-pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
-
 void __init native_pagetable_setup_start(pgd_t *base)
 {
 	unsigned long pfn, va;
@@ -509,7 +504,7 @@ void zap_low_mappings(void)
 
 int nx_enabled;
 
-pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX;
+pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
 #ifdef CONFIG_X86_PAE
@@ -796,8 +791,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
 	/* Enable PGE if available */
 	if (cpu_has_pge) {
 		set_in_cr4(X86_CR4_PGE);
-		__PAGE_KERNEL |= _PAGE_GLOBAL;
-		__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
+		__supported_pte_mask |= _PAGE_GLOBAL;
 	}
 
 	/*
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index a1c0009e81b..64de07e2532 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -78,19 +78,9 @@
 #define PAGE_READONLY_EXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
 					 _PAGE_ACCESSED)
 
-#ifdef CONFIG_X86_32
-#define _PAGE_KERNEL_EXEC \
-	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
-#define _PAGE_KERNEL (_PAGE_KERNEL_EXEC | _PAGE_NX)
-
-#ifndef __ASSEMBLY__
-extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
-#endif	/* __ASSEMBLY__ */
-#else
 #define __PAGE_KERNEL_EXEC						\
 	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
 #define __PAGE_KERNEL		(__PAGE_KERNEL_EXEC | _PAGE_NX)
-#endif
 
 #define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)
 #define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
-- 
cgit v1.2.3-70-g09d2


From 6247943d8ab699b57653afd453a4940cca70ef8a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 6 Jul 2008 11:42:11 -0700
Subject: x86: remove acpi_srat config v2

use ACPI_NUMA directly

and move srat_32.c to mm/

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig          |   5 -
 arch/x86/kernel/Makefile  |   1 -
 arch/x86/kernel/srat_32.c | 280 ----------------------------------------------
 arch/x86/mm/Makefile      |   3 +-
 arch/x86/mm/srat_32.c     | 280 ++++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/srat.h    |   2 +-
 6 files changed, 283 insertions(+), 288 deletions(-)
 delete mode 100644 arch/x86/kernel/srat_32.c
 create mode 100644 arch/x86/mm/srat_32.c

(limited to 'include/asm-x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d8dddbc5e34..bb0c0d0f6db 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -455,11 +455,6 @@ config MEMTEST
 		memtest=4, mean do 4 test patterns.
 	  If you are unsure how to answer this question, answer Y.
 
-config ACPI_SRAT
-	def_bool y
-	depends on X86_32 && ACPI && NUMA && X86_GENERICARCH
-	select ACPI_NUMA
-
 config X86_SUMMIT_NUMA
 	def_bool y
 	depends on X86_32 && NUMA && X86_GENERICARCH
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 212804e7878..54829e2b516 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -64,7 +64,6 @@ obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit_32.o
 obj-y				+= vsmp_64.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_MODULES)		+= module_$(BITS).o
-obj-$(CONFIG_ACPI_SRAT) 	+= srat_32.o
 obj-$(CONFIG_EFI) 		+= efi.o efi_$(BITS).o efi_stub_$(BITS).o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c
deleted file mode 100644
index f41d67f8f83..00000000000
--- a/arch/x86/kernel/srat_32.c
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * Some of the code in this file has been gleaned from the 64 bit 
- * discontigmem support code base.
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.          
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to Pat Gaughen <gone@us.ibm.com>
- */
-#include <linux/mm.h>
-#include <linux/bootmem.h>
-#include <linux/mmzone.h>
-#include <linux/acpi.h>
-#include <linux/nodemask.h>
-#include <asm/srat.h>
-#include <asm/topology.h>
-#include <asm/smp.h>
-#include <asm/e820.h>
-
-/*
- * proximity macros and definitions
- */
-#define NODE_ARRAY_INDEX(x)	((x) / 8)	/* 8 bits/char */
-#define NODE_ARRAY_OFFSET(x)	((x) % 8)	/* 8 bits/char */
-#define BMAP_SET(bmap, bit)	((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit))
-#define BMAP_TEST(bmap, bit)	((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
-/* bitmap length; _PXM is at most 255 */
-#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) 
-static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN];	/* bitmap of proximity domains */
-
-#define MAX_CHUNKS_PER_NODE	3
-#define MAXCHUNKS		(MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
-struct node_memory_chunk_s {
-	unsigned long	start_pfn;
-	unsigned long	end_pfn;
-	u8	pxm;		// proximity domain of node
-	u8	nid;		// which cnode contains this chunk?
-	u8	bank;		// which mem bank on this node
-};
-static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
-
-static int __initdata num_memory_chunks; /* total number of memory chunks */
-static u8 __initdata apicid_to_pxm[MAX_APICID];
-
-int numa_off __initdata;
-int acpi_numa __initdata;
-
-static __init void bad_srat(void)
-{
-        printk(KERN_ERR "SRAT: SRAT not used.\n");
-        acpi_numa = -1;
-	num_memory_chunks = 0;
-}
-
-static __init inline int srat_disabled(void)
-{
-	return numa_off || acpi_numa < 0;
-}
-
-/* Identify CPU proximity domains */
-void __init
-acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
-{
-	if (srat_disabled())
-		return;
-	if (cpu_affinity->header.length !=
-	     sizeof(struct acpi_srat_cpu_affinity)) {
-		bad_srat();
-		return;
-	}
-
-	if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0)
-		return;		/* empty entry */
-
-	/* mark this node as "seen" in node bitmap */
-	BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
-
-	apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
-
-	printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
-		cpu_affinity->apic_id, cpu_affinity->proximity_domain_lo);
-}
-
-/*
- * Identify memory proximity domains and hot-remove capabilities.
- * Fill node memory chunk list structure.
- */
-void __init
-acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
-{
-	unsigned long long paddr, size;
-	unsigned long start_pfn, end_pfn;
-	u8 pxm;
-	struct node_memory_chunk_s *p, *q, *pend;
-
-	if (srat_disabled())
-		return;
-	if (memory_affinity->header.length !=
-	     sizeof(struct acpi_srat_mem_affinity)) {
-		bad_srat();
-		return;
-	}
-
-	if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0)
-		return;		/* empty entry */
-
-	pxm = memory_affinity->proximity_domain & 0xff;
-
-	/* mark this node as "seen" in node bitmap */
-	BMAP_SET(pxm_bitmap, pxm);
-
-	/* calculate info for memory chunk structure */
-	paddr = memory_affinity->base_address;
-	size = memory_affinity->length;
-
-	start_pfn = paddr >> PAGE_SHIFT;
-	end_pfn = (paddr + size) >> PAGE_SHIFT;
-
-
-	if (num_memory_chunks >= MAXCHUNKS) {
-		printk(KERN_WARNING "Too many mem chunks in SRAT."
-			" Ignoring %lld MBytes at %llx\n",
-			size/(1024*1024), paddr);
-		return;
-	}
-
-	/* Insertion sort based on base address */
-	pend = &node_memory_chunk[num_memory_chunks];
-	for (p = &node_memory_chunk[0]; p < pend; p++) {
-		if (start_pfn < p->start_pfn)
-			break;
-	}
-	if (p < pend) {
-		for (q = pend; q >= p; q--)
-			*(q + 1) = *q;
-	}
-	p->start_pfn = start_pfn;
-	p->end_pfn = end_pfn;
-	p->pxm = pxm;
-
-	num_memory_chunks++;
-
-	printk(KERN_DEBUG "Memory range %08lx to %08lx (type %x)"
-			  " in proximity domain %02x %s\n",
-		start_pfn, end_pfn,
-		memory_affinity->memory_type,
-		pxm,
-		((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
-		 "enabled and removable" : "enabled" ) );
-}
-
-/* Callback for SLIT parsing */
-void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
-{
-}
-
-void acpi_numa_arch_fixup(void)
-{
-}
-/*
- * The SRAT table always lists ascending addresses, so can always
- * assume that the first "start" address that you see is the real
- * start of the node, and that the current "end" address is after
- * the previous one.
- */
-static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
-{
-	/*
-	 * Only add present memory as told by the e820.
-	 * There is no guarantee from the SRAT that the memory it
-	 * enumerates is present at boot time because it represents
-	 * *possible* memory hotplug areas the same as normal RAM.
-	 */
-	if (memory_chunk->start_pfn >= max_pfn) {
-		printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n",
-			memory_chunk->start_pfn, memory_chunk->end_pfn);
-		return;
-	}
-	if (memory_chunk->nid != nid)
-		return;
-
-	if (!node_has_online_mem(nid))
-		node_start_pfn[nid] = memory_chunk->start_pfn;
-
-	if (node_start_pfn[nid] > memory_chunk->start_pfn)
-		node_start_pfn[nid] = memory_chunk->start_pfn;
-
-	if (node_end_pfn[nid] < memory_chunk->end_pfn)
-		node_end_pfn[nid] = memory_chunk->end_pfn;
-}
-
-int __init get_memcfg_from_srat(void)
-{
-	int i, j, nid;
-
-
-	if (srat_disabled())
-		goto out_fail;
-
-	if (num_memory_chunks == 0) {
-		printk(KERN_WARNING
-			 "could not finy any ACPI SRAT memory areas.\n");
-		goto out_fail;
-	}
-
-	/* Calculate total number of nodes in system from PXM bitmap and create
-	 * a set of sequential node IDs starting at zero.  (ACPI doesn't seem
-	 * to specify the range of _PXM values.)
-	 */
-	/*
-	 * MCD - we no longer HAVE to number nodes sequentially.  PXM domain
-	 * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically
-	 * 32, so we will continue numbering them in this manner until MAX_NUMNODES
-	 * approaches MAX_PXM_DOMAINS for i386.
-	 */
-	nodes_clear(node_online_map);
-	for (i = 0; i < MAX_PXM_DOMAINS; i++) {
-		if (BMAP_TEST(pxm_bitmap, i)) {
-			int nid = acpi_map_pxm_to_node(i);
-			node_set_online(nid);
-		}
-	}
-	BUG_ON(num_online_nodes() == 0);
-
-	/* set cnode id in memory chunk structure */
-	for (i = 0; i < num_memory_chunks; i++)
-		node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm);
-
-	printk(KERN_DEBUG "pxm bitmap: ");
-	for (i = 0; i < sizeof(pxm_bitmap); i++) {
-		printk(KERN_CONT "%02x ", pxm_bitmap[i]);
-	}
-	printk(KERN_CONT "\n");
-	printk(KERN_DEBUG "Number of logical nodes in system = %d\n",
-			 num_online_nodes());
-	printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
-			 num_memory_chunks);
-
-	for (i = 0; i < MAX_APICID; i++)
-		apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
-
-	for (j = 0; j < num_memory_chunks; j++){
-		struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
-		printk(KERN_DEBUG
-			"chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
-		       j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
-		node_read_chunk(chunk->nid, chunk);
-		e820_register_active_regions(chunk->nid, chunk->start_pfn,
-					     min(chunk->end_pfn, max_pfn));
-	}
-
-	for_each_online_node(nid) {
-		unsigned long start = node_start_pfn[nid];
-		unsigned long end = min(node_end_pfn[nid], max_pfn);
-
-		memory_present(nid, start, end);
-		node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
-	}
-	return 1;
-out_fail:
-	printk(KERN_ERR "failed to get NUMA memory information from SRAT"
-			" table\n");
-	return 0;
-}
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index b7b3e4c7cfc..c107641cd39 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -13,5 +13,6 @@ obj-$(CONFIG_NUMA)		+= discontig_32.o
 else
 obj-$(CONFIG_NUMA)		+= numa_64.o
 obj-$(CONFIG_K8_NUMA)		+= k8topology_64.o
-obj-$(CONFIG_ACPI_NUMA)		+= srat_64.o
 endif
+obj-$(CONFIG_ACPI_NUMA)		+= srat_$(BITS).o
+
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
new file mode 100644
index 00000000000..f41d67f8f83
--- /dev/null
+++ b/arch/x86/mm/srat_32.c
@@ -0,0 +1,280 @@
+/*
+ * Some of the code in this file has been gleaned from the 64 bit 
+ * discontigmem support code base.
+ *
+ * Copyright (C) 2002, IBM Corp.
+ *
+ * All rights reserved.          
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to Pat Gaughen <gone@us.ibm.com>
+ */
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/mmzone.h>
+#include <linux/acpi.h>
+#include <linux/nodemask.h>
+#include <asm/srat.h>
+#include <asm/topology.h>
+#include <asm/smp.h>
+#include <asm/e820.h>
+
+/*
+ * proximity macros and definitions
+ */
+#define NODE_ARRAY_INDEX(x)	((x) / 8)	/* 8 bits/char */
+#define NODE_ARRAY_OFFSET(x)	((x) % 8)	/* 8 bits/char */
+#define BMAP_SET(bmap, bit)	((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit))
+#define BMAP_TEST(bmap, bit)	((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
+/* bitmap length; _PXM is at most 255 */
+#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) 
+static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN];	/* bitmap of proximity domains */
+
+#define MAX_CHUNKS_PER_NODE	3
+#define MAXCHUNKS		(MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
+struct node_memory_chunk_s {
+	unsigned long	start_pfn;
+	unsigned long	end_pfn;
+	u8	pxm;		// proximity domain of node
+	u8	nid;		// which cnode contains this chunk?
+	u8	bank;		// which mem bank on this node
+};
+static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
+
+static int __initdata num_memory_chunks; /* total number of memory chunks */
+static u8 __initdata apicid_to_pxm[MAX_APICID];
+
+int numa_off __initdata;
+int acpi_numa __initdata;
+
+static __init void bad_srat(void)
+{
+        printk(KERN_ERR "SRAT: SRAT not used.\n");
+        acpi_numa = -1;
+	num_memory_chunks = 0;
+}
+
+static __init inline int srat_disabled(void)
+{
+	return numa_off || acpi_numa < 0;
+}
+
+/* Identify CPU proximity domains */
+void __init
+acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
+{
+	if (srat_disabled())
+		return;
+	if (cpu_affinity->header.length !=
+	     sizeof(struct acpi_srat_cpu_affinity)) {
+		bad_srat();
+		return;
+	}
+
+	if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0)
+		return;		/* empty entry */
+
+	/* mark this node as "seen" in node bitmap */
+	BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
+
+	apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
+
+	printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
+		cpu_affinity->apic_id, cpu_affinity->proximity_domain_lo);
+}
+
+/*
+ * Identify memory proximity domains and hot-remove capabilities.
+ * Fill node memory chunk list structure.
+ */
+void __init
+acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
+{
+	unsigned long long paddr, size;
+	unsigned long start_pfn, end_pfn;
+	u8 pxm;
+	struct node_memory_chunk_s *p, *q, *pend;
+
+	if (srat_disabled())
+		return;
+	if (memory_affinity->header.length !=
+	     sizeof(struct acpi_srat_mem_affinity)) {
+		bad_srat();
+		return;
+	}
+
+	if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0)
+		return;		/* empty entry */
+
+	pxm = memory_affinity->proximity_domain & 0xff;
+
+	/* mark this node as "seen" in node bitmap */
+	BMAP_SET(pxm_bitmap, pxm);
+
+	/* calculate info for memory chunk structure */
+	paddr = memory_affinity->base_address;
+	size = memory_affinity->length;
+
+	start_pfn = paddr >> PAGE_SHIFT;
+	end_pfn = (paddr + size) >> PAGE_SHIFT;
+
+
+	if (num_memory_chunks >= MAXCHUNKS) {
+		printk(KERN_WARNING "Too many mem chunks in SRAT."
+			" Ignoring %lld MBytes at %llx\n",
+			size/(1024*1024), paddr);
+		return;
+	}
+
+	/* Insertion sort based on base address */
+	pend = &node_memory_chunk[num_memory_chunks];
+	for (p = &node_memory_chunk[0]; p < pend; p++) {
+		if (start_pfn < p->start_pfn)
+			break;
+	}
+	if (p < pend) {
+		for (q = pend; q >= p; q--)
+			*(q + 1) = *q;
+	}
+	p->start_pfn = start_pfn;
+	p->end_pfn = end_pfn;
+	p->pxm = pxm;
+
+	num_memory_chunks++;
+
+	printk(KERN_DEBUG "Memory range %08lx to %08lx (type %x)"
+			  " in proximity domain %02x %s\n",
+		start_pfn, end_pfn,
+		memory_affinity->memory_type,
+		pxm,
+		((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
+		 "enabled and removable" : "enabled" ) );
+}
+
+/* Callback for SLIT parsing */
+void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
+{
+}
+
+void acpi_numa_arch_fixup(void)
+{
+}
+/*
+ * The SRAT table always lists ascending addresses, so can always
+ * assume that the first "start" address that you see is the real
+ * start of the node, and that the current "end" address is after
+ * the previous one.
+ */
+static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
+{
+	/*
+	 * Only add present memory as told by the e820.
+	 * There is no guarantee from the SRAT that the memory it
+	 * enumerates is present at boot time because it represents
+	 * *possible* memory hotplug areas the same as normal RAM.
+	 */
+	if (memory_chunk->start_pfn >= max_pfn) {
+		printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n",
+			memory_chunk->start_pfn, memory_chunk->end_pfn);
+		return;
+	}
+	if (memory_chunk->nid != nid)
+		return;
+
+	if (!node_has_online_mem(nid))
+		node_start_pfn[nid] = memory_chunk->start_pfn;
+
+	if (node_start_pfn[nid] > memory_chunk->start_pfn)
+		node_start_pfn[nid] = memory_chunk->start_pfn;
+
+	if (node_end_pfn[nid] < memory_chunk->end_pfn)
+		node_end_pfn[nid] = memory_chunk->end_pfn;
+}
+
+int __init get_memcfg_from_srat(void)
+{
+	int i, j, nid;
+
+
+	if (srat_disabled())
+		goto out_fail;
+
+	if (num_memory_chunks == 0) {
+		printk(KERN_WARNING
+			 "could not finy any ACPI SRAT memory areas.\n");
+		goto out_fail;
+	}
+
+	/* Calculate total number of nodes in system from PXM bitmap and create
+	 * a set of sequential node IDs starting at zero.  (ACPI doesn't seem
+	 * to specify the range of _PXM values.)
+	 */
+	/*
+	 * MCD - we no longer HAVE to number nodes sequentially.  PXM domain
+	 * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically
+	 * 32, so we will continue numbering them in this manner until MAX_NUMNODES
+	 * approaches MAX_PXM_DOMAINS for i386.
+	 */
+	nodes_clear(node_online_map);
+	for (i = 0; i < MAX_PXM_DOMAINS; i++) {
+		if (BMAP_TEST(pxm_bitmap, i)) {
+			int nid = acpi_map_pxm_to_node(i);
+			node_set_online(nid);
+		}
+	}
+	BUG_ON(num_online_nodes() == 0);
+
+	/* set cnode id in memory chunk structure */
+	for (i = 0; i < num_memory_chunks; i++)
+		node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm);
+
+	printk(KERN_DEBUG "pxm bitmap: ");
+	for (i = 0; i < sizeof(pxm_bitmap); i++) {
+		printk(KERN_CONT "%02x ", pxm_bitmap[i]);
+	}
+	printk(KERN_CONT "\n");
+	printk(KERN_DEBUG "Number of logical nodes in system = %d\n",
+			 num_online_nodes());
+	printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
+			 num_memory_chunks);
+
+	for (i = 0; i < MAX_APICID; i++)
+		apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
+
+	for (j = 0; j < num_memory_chunks; j++){
+		struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
+		printk(KERN_DEBUG
+			"chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
+		       j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
+		node_read_chunk(chunk->nid, chunk);
+		e820_register_active_regions(chunk->nid, chunk->start_pfn,
+					     min(chunk->end_pfn, max_pfn));
+	}
+
+	for_each_online_node(nid) {
+		unsigned long start = node_start_pfn[nid];
+		unsigned long end = min(node_end_pfn[nid], max_pfn);
+
+		memory_present(nid, start, end);
+		node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
+	}
+	return 1;
+out_fail:
+	printk(KERN_ERR "failed to get NUMA memory information from SRAT"
+			" table\n");
+	return 0;
+}
diff --git a/include/asm-x86/srat.h b/include/asm-x86/srat.h
index 456fe0b5a92..774c919dc23 100644
--- a/include/asm-x86/srat.h
+++ b/include/asm-x86/srat.h
@@ -27,7 +27,7 @@
 #ifndef _ASM_SRAT_H_
 #define _ASM_SRAT_H_
 
-#ifdef CONFIG_ACPI_SRAT
+#ifdef CONFIG_ACPI_NUMA
 extern int get_memcfg_from_srat(void);
 #else
 static inline int get_memcfg_from_srat(void)
-- 
cgit v1.2.3-70-g09d2


From 5dfcf14d5b28174f94cbe9b4fb35d415db61c64a Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Fri, 27 Jun 2008 13:12:55 +0200
Subject: x86: use FIRMWARE_MEMMAP on x86/E820

This patch uses the /sys/firmware/memmap interface provided in the last patch
on the x86 architecture when E820 is used. The patch copies the E820
memory map very early, and registers the E820 map afterwards via
firmware_map_add_early().

Signed-off-by: Bernhard Walle <bwalle@suse.de>
Acked-by: Greg KH <gregkh@suse.de>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Cc: kexec@lists.infradead.org
Cc: yhlu.kernel@gmail.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c | 44 +++++++++++++++++++++++++++++++++++++-------
 include/asm-x86/e820.h |  2 ++
 2 files changed, 39 insertions(+), 7 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d0335853ff5..fc1d579f212 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -19,6 +19,7 @@
 #include <linux/mm.h>
 #include <linux/pfn.h>
 #include <linux/suspend.h>
+#include <linux/firmware-map.h>
 
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -27,7 +28,22 @@
 #include <asm/setup.h>
 #include <asm/trampoline.h>
 
+/*
+ * The e820 map is the map that gets modified e.g. with command line parameters
+ * and that is also registered with modifications in the kernel resource tree
+ * with the iomem_resource as parent.
+ *
+ * The e820_saved is directly saved after the BIOS-provided memory map is
+ * copied. It doesn't get modified afterwards. It's registered for the
+ * /sys/firmware/memmap interface.
+ *
+ * That memory map is not modified and is used as base for kexec. The kexec'd
+ * kernel should get the same memory map as the firmware provides. Then the
+ * user can e.g. boot the original kernel with mem=1G while still booting the
+ * next kernel with full memory.
+ */
 struct e820map e820;
+struct e820map e820_saved;
 
 /* For PCI or other memory-mapped resources */
 unsigned long pci_mem_start = 0xaeedbabe;
@@ -1198,6 +1214,17 @@ void __init finish_e820_parsing(void)
 	}
 }
 
+static inline const char *e820_type_to_string(int e820_type)
+{
+	switch (e820_type) {
+	case E820_RESERVED_KERN:
+	case E820_RAM:	return "System RAM";
+	case E820_ACPI:	return "ACPI Tables";
+	case E820_NVS:	return "ACPI Non-volatile Storage";
+	default:	return "reserved";
+	}
+}
+
 /*
  * Mark e820 reserved areas as busy for the resource manager.
  */
@@ -1209,13 +1236,6 @@ void __init e820_reserve_resources(void)
 
 	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
 	for (i = 0; i < e820.nr_map; i++) {
-		switch (e820.map[i].type) {
-		case E820_RESERVED_KERN:
-		case E820_RAM:	res->name = "System RAM"; break;
-		case E820_ACPI:	res->name = "ACPI Tables"; break;
-		case E820_NVS:	res->name = "ACPI Non-volatile Storage"; break;
-		default:	res->name = "reserved";
-		}
 		end = e820.map[i].addr + e820.map[i].size - 1;
 #ifndef CONFIG_RESOURCES_64BIT
 		if (end > 0x100000000ULL) {
@@ -1223,6 +1243,7 @@ void __init e820_reserve_resources(void)
 			continue;
 		}
 #endif
+		res->name = e820_type_to_string(e820.map[i].type);
 		res->start = e820.map[i].addr;
 		res->end = end;
 
@@ -1230,6 +1251,13 @@ void __init e820_reserve_resources(void)
 		insert_resource(&iomem_resource, res);
 		res++;
 	}
+
+	for (i = 0; i < e820_saved.nr_map; i++) {
+		struct e820entry *entry = &e820_saved.map[i];
+		firmware_map_add_early(entry->addr,
+			entry->addr + entry->size - 1,
+			e820_type_to_string(entry->type));
+	}
 }
 
 char *__init default_machine_specific_memory_setup(void)
@@ -1266,6 +1294,8 @@ char *__init default_machine_specific_memory_setup(void)
 		e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
 	}
 
+	memcpy(&e820_saved, &e820, sizeof(struct e820map));
+
 	/* In case someone cares... */
 	return who;
 }
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 4b26604b3c1..a20d0a7f589 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -59,7 +59,9 @@ struct e820map {
 	struct e820entry map[E820_X_MAX];
 };
 
+/* see comment in arch/x86/kernel/e820.c */
 extern struct e820map e820;
+extern struct e820map e820_saved;
 
 extern int e820_any_mapped(u64 start, u64 end, unsigned type);
 extern int e820_all_mapped(u64 start, u64 end, unsigned type);
-- 
cgit v1.2.3-70-g09d2


From 3a9e189d69479736a0d0901c87ad08c9e328b389 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 1 Jul 2008 14:45:32 -0500
Subject: x86: map UV chipset space - pagetable

Add boot-time function for creating additional 2MB page table entries for
mapping chipset specific cached/uncached ranges.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: linux-mm@kvack.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c     | 40 ++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/page_64.h |  4 ++++
 include/asm-x86/pgtable.h |  2 ++
 3 files changed, 46 insertions(+)

(limited to 'include/asm-x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 77d129d62c9..57d5eff754c 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -201,6 +201,46 @@ set_pte_vaddr(unsigned long vaddr, pte_t pteval)
 	set_pte_vaddr_pud(pud_page, vaddr, pteval);
 }
 
+/*
+ * Create large page table mappings for a range of physical addresses.
+ */
+static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
+						pgprot_t prot)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK));
+	for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
+		pgd = pgd_offset_k((unsigned long)__va(phys));
+		if (pgd_none(*pgd)) {
+			pud = (pud_t *) spp_getpage();
+			set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
+						_PAGE_USER));
+		}
+		pud = pud_offset(pgd, (unsigned long)__va(phys));
+		if (pud_none(*pud)) {
+			pmd = (pmd_t *) spp_getpage();
+			set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
+						_PAGE_USER));
+		}
+		pmd = pmd_offset(pud, phys);
+		BUG_ON(!pmd_none(*pmd));
+		set_pmd(pmd, __pmd(phys | pgprot_val(prot)));
+	}
+}
+
+void __init init_extra_mapping_wb(unsigned long phys, unsigned long size)
+{
+	__init_extra_mapping(phys, size, PAGE_KERNEL_LARGE);
+}
+
+void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
+{
+	__init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE);
+}
+
 /*
  * The head.S code sets up the kernel high mapping:
  *
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index 010d12db80d..c6916c83e6b 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -91,6 +91,10 @@ extern unsigned long init_memory_mapping(unsigned long start,
 					 unsigned long end);
 
 extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+
+extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
+extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
+
 #endif	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_FLATMEM
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 64de07e2532..49cbd76b954 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -91,6 +91,7 @@
 #define __PAGE_KERNEL_VSYSCALL		(__PAGE_KERNEL_RX | _PAGE_USER)
 #define __PAGE_KERNEL_VSYSCALL_NOCACHE	(__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
 #define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
+#define __PAGE_KERNEL_LARGE_NOCACHE	(__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
 
 #define PAGE_KERNEL			__pgprot(__PAGE_KERNEL)
@@ -102,6 +103,7 @@
 #define PAGE_KERNEL_UC_MINUS		__pgprot(__PAGE_KERNEL_UC_MINUS)
 #define PAGE_KERNEL_EXEC_NOCACHE	__pgprot(__PAGE_KERNEL_EXEC_NOCACHE)
 #define PAGE_KERNEL_LARGE		__pgprot(__PAGE_KERNEL_LARGE)
+#define PAGE_KERNEL_LARGE_NOCACHE	__pgprot(__PAGE_KERNEL_LARGE_NOCACHE)
 #define PAGE_KERNEL_LARGE_EXEC		__pgprot(__PAGE_KERNEL_LARGE_EXEC)
 #define PAGE_KERNEL_VSYSCALL		__pgprot(__PAGE_KERNEL_VSYSCALL)
 #define PAGE_KERNEL_VSYSCALL_NOCACHE	__pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE)
-- 
cgit v1.2.3-70-g09d2


From 83f5d894ca5280bfcd904dfeb1347c2da2b19aac Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 1 Jul 2008 14:45:38 -0500
Subject: x86: map UV chipset space - UV support

Create page table entries to map the SGI UV chipset GRU. local MMR &
global MMR ranges.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: linux-mm@kvack.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/genx2apic_uv_x.c | 75 +++++++++++++++++++++++++++++++++++++++-
 include/asm-x86/uv/uv_hub.h      |  2 ++
 include/asm-x86/uv/uv_mmrs.h     | 46 ++++++++++++++++++++++++
 3 files changed, 122 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 45e84acca8a..711f11c30b0 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -8,6 +8,7 @@
  * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
  */
 
+#include <linux/kernel.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/string.h>
@@ -20,6 +21,7 @@
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
+#include <asm/pgtable.h>
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
 
@@ -208,14 +210,79 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
 	BUG();
 }
 
+static __init void map_low_mmrs(void)
+{
+	init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
+	init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
+}
+
+enum map_type {map_wb, map_uc};
+
+static void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
+{
+	unsigned long bytes, paddr;
+
+	paddr = base << shift;
+	bytes = (1UL << shift);
+	printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
+	       					paddr + bytes);
+	if (map_type == map_uc)
+		init_extra_mapping_uc(paddr, bytes);
+	else
+		init_extra_mapping_wb(paddr, bytes);
+
+}
+static __init void map_gru_high(int max_pnode)
+{
+	union uvh_rh_gam_gru_overlay_config_mmr_u gru;
+	int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+	gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
+	if (gru.s.enable)
+		map_high("GRU", gru.s.base, shift, map_wb);
+}
+
+static __init void map_config_high(int max_pnode)
+{
+	union uvh_rh_gam_cfg_overlay_config_mmr_u cfg;
+	int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+	cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
+	if (cfg.s.enable)
+		map_high("CONFIG", cfg.s.base, shift, map_uc);
+}
+
+static __init void map_mmr_high(int max_pnode)
+{
+	union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
+	int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+	mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
+	if (mmr.s.enable)
+		map_high("MMR", mmr.s.base, shift, map_uc);
+}
+
+static __init void map_mmioh_high(int max_pnode)
+{
+	union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
+	int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+	mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
+	if (mmioh.s.enable)
+		map_high("MMIOH", mmioh.s.base, shift, map_uc);
+}
+
 static __init void uv_system_init(void)
 {
 	union uvh_si_addr_map_config_u m_n_config;
 	union uvh_node_id_u node_id;
 	unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
 	int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
+	int max_pnode = 0;
 	unsigned long mmr_base, present;
 
+	map_low_mmrs();
+
 	m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
 	m_val = m_n_config.s.m_skt;
 	n_val = m_n_config.s.n_skt;
@@ -281,12 +348,18 @@ static __init void uv_system_init(void)
 		uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
 		uv_node_to_blade[nid] = blade;
 		uv_cpu_to_blade[cpu] = blade;
+		max_pnode = max(pnode, max_pnode);
 
-		printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, pnode %d, nid %d, "
+		printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, "
 			"lcpu %d, blade %d\n",
 			cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid,
 			lcpu, blade);
 	}
+
+	map_gru_high(max_pnode);
+	map_mmr_high(max_pnode);
+	map_config_high(max_pnode);
+	map_mmioh_high(max_pnode);
 }
 
 /*
diff --git a/include/asm-x86/uv/uv_hub.h b/include/asm-x86/uv/uv_hub.h
index 65004881de5..a4ef26e5850 100644
--- a/include/asm-x86/uv/uv_hub.h
+++ b/include/asm-x86/uv/uv_hub.h
@@ -149,6 +149,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
 #define UV_LOCAL_MMR_BASE		0xf4000000UL
 #define UV_GLOBAL_MMR32_BASE		0xf8000000UL
 #define UV_GLOBAL_MMR64_BASE		(uv_hub_info->global_mmr_base)
+#define UV_LOCAL_MMR_SIZE		(64UL * 1024 * 1024)
+#define UV_GLOBAL_MMR32_SIZE		(64UL * 1024 * 1024)
 
 #define UV_GLOBAL_MMR32_PNODE_SHIFT	15
 #define UV_GLOBAL_MMR64_PNODE_SHIFT	26
diff --git a/include/asm-x86/uv/uv_mmrs.h b/include/asm-x86/uv/uv_mmrs.h
index ac984607652..37113f554a9 100644
--- a/include/asm-x86/uv/uv_mmrs.h
+++ b/include/asm-x86/uv/uv_mmrs.h
@@ -712,6 +712,26 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                    UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR                      */
+/* ========================================================================= */
+#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR 0x1600020UL
+
+#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT 26
+#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_rh_gam_cfg_overlay_config_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_cfg_overlay_config_mmr_s {
+	unsigned long	rsvd_0_25: 26;  /*    */
+	unsigned long	base   : 20;  /* RW */
+	unsigned long	rsvd_46_62: 17;  /*    */
+	unsigned long	enable :  1;  /* RW */
+    } s;
+};
+
 /* ========================================================================= */
 /*                    UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR                      */
 /* ========================================================================= */
@@ -739,6 +759,32 @@ union uvh_rh_gam_gru_overlay_config_mmr_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                   UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR                     */
+/* ========================================================================= */
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_rh_gam_mmioh_overlay_config_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_mmioh_overlay_config_mmr_s {
+	unsigned long	rsvd_0_29: 30;  /*    */
+	unsigned long	base   : 16;  /* RW */
+	unsigned long	m_io   :  6;  /* RW */
+	unsigned long	n_io   :  4;  /* RW */
+	unsigned long	rsvd_56_62:  7;  /*    */
+	unsigned long	enable :  1;  /* RW */
+    } s;
+};
+
 /* ========================================================================= */
 /*                    UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR                      */
 /* ========================================================================= */
-- 
cgit v1.2.3-70-g09d2


From 5d061e397db1ee7a62783a881833f3f9b89f6dc8 Mon Sep 17 00:00:00 2001
From: Dimitri Sivanich <sivanich@sgi.com>
Date: Wed, 2 Jul 2008 15:39:35 -0500
Subject: x86, uv: update x86 mmr list for SGI uv

This patch updates the X86 mmr list for SGI uv.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Russ Anderson <rja@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uv/uv_mmrs.h | 535 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 478 insertions(+), 57 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uv/uv_mmrs.h b/include/asm-x86/uv/uv_mmrs.h
index 37113f554a9..eb59201dc01 100644
--- a/include/asm-x86/uv/uv_mmrs.h
+++ b/include/asm-x86/uv/uv_mmrs.h
@@ -17,7 +17,7 @@
 /*                           UVH_BAU_DATA_CONFIG                             */
 /* ========================================================================= */
 #define UVH_BAU_DATA_CONFIG 0x61680UL
-#define UVH_BAU_DATA_CONFIG_32 0x0450
+#define UVH_BAU_DATA_CONFIG_32 0x0438
 
 #define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0
 #define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL
@@ -52,11 +52,249 @@ union uvh_bau_data_config_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                           UVH_EVENT_OCCURRED0                             */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED0 0x70000UL
+#define UVH_EVENT_OCCURRED0_32 0x005e8
+
+#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0
+#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
+#define UVH_EVENT_OCCURRED0_GR0_HCERR_SHFT 1
+#define UVH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL
+#define UVH_EVENT_OCCURRED0_GR1_HCERR_SHFT 2
+#define UVH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL
+#define UVH_EVENT_OCCURRED0_LH_HCERR_SHFT 3
+#define UVH_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL
+#define UVH_EVENT_OCCURRED0_RH_HCERR_SHFT 4
+#define UVH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL
+#define UVH_EVENT_OCCURRED0_XN_HCERR_SHFT 5
+#define UVH_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL
+#define UVH_EVENT_OCCURRED0_SI_HCERR_SHFT 6
+#define UVH_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL
+#define UVH_EVENT_OCCURRED0_LB_AOERR0_SHFT 7
+#define UVH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL
+#define UVH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8
+#define UVH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL
+#define UVH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9
+#define UVH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL
+#define UVH_EVENT_OCCURRED0_LH_AOERR0_SHFT 10
+#define UVH_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL
+#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
+#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
+#define UVH_EVENT_OCCURRED0_XN_AOERR0_SHFT 12
+#define UVH_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL
+#define UVH_EVENT_OCCURRED0_SI_AOERR0_SHFT 13
+#define UVH_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL
+#define UVH_EVENT_OCCURRED0_LB_AOERR1_SHFT 14
+#define UVH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL
+#define UVH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15
+#define UVH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL
+#define UVH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16
+#define UVH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL
+#define UVH_EVENT_OCCURRED0_LH_AOERR1_SHFT 17
+#define UVH_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL
+#define UVH_EVENT_OCCURRED0_RH_AOERR1_SHFT 18
+#define UVH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL
+#define UVH_EVENT_OCCURRED0_XN_AOERR1_SHFT 19
+#define UVH_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL
+#define UVH_EVENT_OCCURRED0_SI_AOERR1_SHFT 20
+#define UVH_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL
+#define UVH_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21
+#define UVH_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL
+#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22
+#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL
+#define UVH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39
+#define UVH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL
+#define UVH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40
+#define UVH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL
+#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41
+#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL
+#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42
+#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL
+#define UVH_EVENT_OCCURRED0_LTC_INT_SHFT 43
+#define UVH_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL
+#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44
+#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL
+#define UVH_EVENT_OCCURRED0_IPI_INT_SHFT 45
+#define UVH_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL
+#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46
+#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL
+#define UVH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47
+#define UVH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL
+#define UVH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48
+#define UVH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL
+#define UVH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49
+#define UVH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL
+#define UVH_EVENT_OCCURRED0_PROFILE_INT_SHFT 50
+#define UVH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL
+#define UVH_EVENT_OCCURRED0_RTC0_SHFT 51
+#define UVH_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL
+#define UVH_EVENT_OCCURRED0_RTC1_SHFT 52
+#define UVH_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL
+#define UVH_EVENT_OCCURRED0_RTC2_SHFT 53
+#define UVH_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL
+#define UVH_EVENT_OCCURRED0_RTC3_SHFT 54
+#define UVH_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL
+#define UVH_EVENT_OCCURRED0_BAU_DATA_SHFT 55
+#define UVH_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL
+#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56
+#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL
+union uvh_event_occurred0_u {
+    unsigned long	v;
+    struct uvh_event_occurred0_s {
+	unsigned long	lb_hcerr             :  1;  /* RW, W1C */
+	unsigned long	gr0_hcerr            :  1;  /* RW, W1C */
+	unsigned long	gr1_hcerr            :  1;  /* RW, W1C */
+	unsigned long	lh_hcerr             :  1;  /* RW, W1C */
+	unsigned long	rh_hcerr             :  1;  /* RW, W1C */
+	unsigned long	xn_hcerr             :  1;  /* RW, W1C */
+	unsigned long	si_hcerr             :  1;  /* RW, W1C */
+	unsigned long	lb_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	gr0_aoerr0           :  1;  /* RW, W1C */
+	unsigned long	gr1_aoerr0           :  1;  /* RW, W1C */
+	unsigned long	lh_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	rh_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	xn_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	si_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	lb_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	gr0_aoerr1           :  1;  /* RW, W1C */
+	unsigned long	gr1_aoerr1           :  1;  /* RW, W1C */
+	unsigned long	lh_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	rh_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	xn_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	si_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	rh_vpi_int           :  1;  /* RW, W1C */
+	unsigned long	system_shutdown_int  :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_0         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_1         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_2         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_3         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_4         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_5         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_6         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_7         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_8         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_9         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_10        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_11        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_12        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_13        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_14        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_15        :  1;  /* RW, W1C */
+	unsigned long	l1_nmi_int           :  1;  /* RW, W1C */
+	unsigned long	stop_clock           :  1;  /* RW, W1C */
+	unsigned long	asic_to_l1           :  1;  /* RW, W1C */
+	unsigned long	l1_to_asic           :  1;  /* RW, W1C */
+	unsigned long	ltc_int              :  1;  /* RW, W1C */
+	unsigned long	la_seq_trigger       :  1;  /* RW, W1C */
+	unsigned long	ipi_int              :  1;  /* RW, W1C */
+	unsigned long	extio_int0           :  1;  /* RW, W1C */
+	unsigned long	extio_int1           :  1;  /* RW, W1C */
+	unsigned long	extio_int2           :  1;  /* RW, W1C */
+	unsigned long	extio_int3           :  1;  /* RW, W1C */
+	unsigned long	profile_int          :  1;  /* RW, W1C */
+	unsigned long	rtc0                 :  1;  /* RW, W1C */
+	unsigned long	rtc1                 :  1;  /* RW, W1C */
+	unsigned long	rtc2                 :  1;  /* RW, W1C */
+	unsigned long	rtc3                 :  1;  /* RW, W1C */
+	unsigned long	bau_data             :  1;  /* RW, W1C */
+	unsigned long	power_management_req :  1;  /* RW, W1C */
+	unsigned long	rsvd_57_63           :  7;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                        UVH_EVENT_OCCURRED0_ALIAS                          */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL
+#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0
+
+/* ========================================================================= */
+/*                               UVH_INT_CMPB                                */
+/* ========================================================================= */
+#define UVH_INT_CMPB 0x22080UL
+
+#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0
+#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL
+
+union uvh_int_cmpb_u {
+    unsigned long	v;
+    struct uvh_int_cmpb_s {
+	unsigned long	real_time_cmpb : 56;  /* RW */
+	unsigned long	rsvd_56_63     :  8;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                               UVH_INT_CMPC                                */
+/* ========================================================================= */
+#define UVH_INT_CMPC 0x22100UL
+
+#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0
+#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL
+
+union uvh_int_cmpc_u {
+    unsigned long	v;
+    struct uvh_int_cmpc_s {
+	unsigned long	real_time_cmpc : 56;  /* RW */
+	unsigned long	rsvd_56_63     :  8;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                               UVH_INT_CMPD                                */
+/* ========================================================================= */
+#define UVH_INT_CMPD 0x22180UL
+
+#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0
+#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL
+
+union uvh_int_cmpd_u {
+    unsigned long	v;
+    struct uvh_int_cmpd_s {
+	unsigned long	real_time_cmpd : 56;  /* RW */
+	unsigned long	rsvd_56_63     :  8;  /*    */
+    } s;
+};
+
 /* ========================================================================= */
 /*                               UVH_IPI_INT                                 */
 /* ========================================================================= */
 #define UVH_IPI_INT 0x60500UL
-#define UVH_IPI_INT_32 0x0360
+#define UVH_IPI_INT_32 0x0348
 
 #define UVH_IPI_INT_VECTOR_SHFT 0
 #define UVH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL
@@ -86,7 +324,7 @@ union uvh_ipi_int_u {
 /*                   UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST                     */
 /* ========================================================================= */
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x009f0
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x009c0
 
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
@@ -108,7 +346,7 @@ union uvh_lb_bau_intd_payload_queue_first_u {
 /*                    UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST                     */
 /* ========================================================================= */
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x009f8
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x009c8
 
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
@@ -126,7 +364,7 @@ union uvh_lb_bau_intd_payload_queue_last_u {
 /*                    UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL                     */
 /* ========================================================================= */
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x00a00
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x009d0
 
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
@@ -144,7 +382,7 @@ union uvh_lb_bau_intd_payload_queue_tail_u {
 /*                   UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE                    */
 /* ========================================================================= */
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x0aa0
+#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x0a68
 
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
@@ -205,13 +443,13 @@ union uvh_lb_bau_intd_software_acknowledge_u {
 /*                UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS                 */
 /* ========================================================================= */
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0aa8
+#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0a70
 
 /* ========================================================================= */
 /*                     UVH_LB_BAU_SB_ACTIVATION_CONTROL                      */
 /* ========================================================================= */
 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x009d8
+#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x009a8
 
 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0
 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL
@@ -234,7 +472,7 @@ union uvh_lb_bau_sb_activation_control_u {
 /*                    UVH_LB_BAU_SB_ACTIVATION_STATUS_0                      */
 /* ========================================================================= */
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x009e0
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x009b0
 
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL
@@ -250,7 +488,7 @@ union uvh_lb_bau_sb_activation_status_0_u {
 /*                    UVH_LB_BAU_SB_ACTIVATION_STATUS_1                      */
 /* ========================================================================= */
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x009e8
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x009b8
 
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL
@@ -266,7 +504,7 @@ union uvh_lb_bau_sb_activation_status_1_u {
 /*                      UVH_LB_BAU_SB_DESCRIPTOR_BASE                        */
 /* ========================================================================= */
 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x009d0
+#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x009a0
 
 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12
 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
@@ -333,46 +571,48 @@ union uvh_lb_bau_sb_descriptor_base_u {
 #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_MASK 0x0000000000100000UL
 #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_SHFT 21
 #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_MASK 0x0000000000200000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_TIMEOUT_SHFT 22
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_TIMEOUT_MASK 0x0000000000400000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_SHFT 23
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_MASK 0x0000000000800000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_SHFT 24
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_MASK 0x0000000001000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_SHFT 25
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_MASK 0x0000000002000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_SHFT 26
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_MASK 0x0000000004000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_SHFT 27
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_MASK 0x0000000008000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_SHFT 28
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_MASK 0x0000000010000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_SHFT 29
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_MASK 0x0000000020000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_SHFT 30
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_MASK 0x0000000040000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_SHFT 31
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_MASK 0x0000000080000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_SHFT 32
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_MASK 0x0000000100000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_SHFT 33
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_MASK 0x0000000200000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_SHFT 34
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_MASK 0x0000000400000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_SHFT 35
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000000800000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_SHFT 36
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_MASK 0x0000001000000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_SHFT 37
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_MASK 0x0000002000000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_SHFT 38
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_MASK 0x0000004000000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_SHFT 39
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_MASK 0x0000008000000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_SHFT 40
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_MASK 0x0000010000000000UL
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_SHFT 41
-#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_MASK 0x0000020000000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_SHFT 22
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_MASK 0x0000000000400000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_SHFT 23
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_MASK 0x0000000000800000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_SHFT 24
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_MASK 0x0000000001000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_SHFT 25
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_MASK 0x0000000002000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_SHFT 26
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_MASK 0x0000000004000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_SHFT 27
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_MASK 0x0000000008000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_SHFT 28
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_MASK 0x0000000010000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_SHFT 29
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_MASK 0x0000000020000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_SHFT 30
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_MASK 0x0000000040000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_SHFT 31
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_MASK 0x0000000080000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_SHFT 32
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_MASK 0x0000000100000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_SHFT 33
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_MASK 0x0000000200000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_SHFT 34
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_MASK 0x0000000400000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_SHFT 35
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_MASK 0x0000000800000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_SHFT 36
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000001000000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_SHFT 37
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_MASK 0x0000002000000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_SHFT 38
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_MASK 0x0000004000000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_SHFT 39
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_MASK 0x0000008000000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_SHFT 40
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_MASK 0x0000010000000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_SHFT 41
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_MASK 0x0000020000000000UL
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_SHFT 42
+#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_MASK 0x0000040000000000UL
 
 union uvh_lb_mcast_aoerr0_rpt_enable_u {
     unsigned long	v;
@@ -399,7 +639,8 @@ union uvh_lb_mcast_aoerr0_rpt_enable_u {
 	unsigned long	macc_rep_runt_msg                       :  1;  /* RW */
 	unsigned long	macc_rep_obese_msg                      :  1;  /* RW */
 	unsigned long	macc_rep_data_sb_err                    :  1;  /* RW */
-	unsigned long	macc_timeout                            :  1;  /* RW */
+	unsigned long	macc_amo_timeout                        :  1;  /* RW */
+	unsigned long	macc_put_timeout                        :  1;  /* RW */
 	unsigned long	macc_spurious_event                     :  1;  /* RW */
 	unsigned long	ioh_destination_table_parity            :  1;  /* RW */
 	unsigned long	get_had_error_reply                     :  1;  /* RW */
@@ -419,7 +660,7 @@ union uvh_lb_mcast_aoerr0_rpt_enable_u {
 	unsigned long	int_rep_obese_msg                       :  1;  /* RW */
 	unsigned long	int_rep_command_err                     :  1;  /* RW */
 	unsigned long	int_timeout                             :  1;  /* RW */
-	unsigned long	rsvd_42_63                              : 22;  /*    */
+	unsigned long	rsvd_43_63                              : 21;  /*    */
     } s;
 };
 
@@ -732,6 +973,26 @@ union uvh_rh_gam_cfg_overlay_config_mmr_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                    UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR                      */
+/* ========================================================================= */
+#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR 0x1600020UL
+
+#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT 26
+#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_rh_gam_cfg_overlay_config_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_cfg_overlay_config_mmr_s {
+	unsigned long	rsvd_0_25: 26;  /*    */
+	unsigned long	base   : 20;  /* RW */
+	unsigned long	rsvd_46_62: 17;  /*    */
+	unsigned long	enable :  1;  /* RW */
+    } s;
+};
+
 /* ========================================================================= */
 /*                    UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR                      */
 /* ========================================================================= */
@@ -739,8 +1000,8 @@ union uvh_rh_gam_cfg_overlay_config_mmr_u {
 
 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 46
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0000400000000000UL
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL
 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
@@ -751,8 +1012,9 @@ union uvh_rh_gam_gru_overlay_config_mmr_u {
     struct uvh_rh_gam_gru_overlay_config_mmr_s {
 	unsigned long	rsvd_0_27: 28;  /*    */
 	unsigned long	base   : 18;  /* RW */
+	unsigned long	rsvd_46_47:  2;  /*    */
 	unsigned long	gr4    :  1;  /* RW */
-	unsigned long	rsvd_47_51:  5;  /*    */
+	unsigned long	rsvd_49_51:  3;  /*    */
 	unsigned long	n_gru  :  4;  /* RW */
 	unsigned long	rsvd_56_62:  7;  /*    */
 	unsigned long	enable :  1;  /* RW */
@@ -785,6 +1047,32 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                   UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR                     */
+/* ========================================================================= */
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_rh_gam_mmioh_overlay_config_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_mmioh_overlay_config_mmr_s {
+	unsigned long	rsvd_0_29: 30;  /*    */
+	unsigned long	base   : 16;  /* RW */
+	unsigned long	m_io   :  6;  /* RW */
+	unsigned long	n_io   :  4;  /* RW */
+	unsigned long	rsvd_56_62:  7;  /*    */
+	unsigned long	enable :  1;  /* RW */
+    } s;
+};
+
 /* ========================================================================= */
 /*                    UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR                      */
 /* ========================================================================= */
@@ -811,7 +1099,7 @@ union uvh_rh_gam_mmr_overlay_config_mmr_u {
 /* ========================================================================= */
 /*                                 UVH_RTC                                   */
 /* ========================================================================= */
-#define UVH_RTC 0x28000UL
+#define UVH_RTC 0x340000UL
 
 #define UVH_RTC_REAL_TIME_CLOCK_SHFT 0
 #define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL
@@ -824,6 +1112,139 @@ union uvh_rtc_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                           UVH_RTC1_INT_CONFIG                             */
+/* ========================================================================= */
+#define UVH_RTC1_INT_CONFIG 0x615c0UL
+
+#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0
+#define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_RTC1_INT_CONFIG_DM_SHFT 8
+#define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11
+#define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12
+#define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_RTC1_INT_CONFIG_P_SHFT 13
+#define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_RTC1_INT_CONFIG_T_SHFT 15
+#define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_RTC1_INT_CONFIG_M_SHFT 16
+#define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32
+#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_rtc1_int_config_u {
+    unsigned long	v;
+    struct uvh_rtc1_int_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                           UVH_RTC2_INT_CONFIG                             */
+/* ========================================================================= */
+#define UVH_RTC2_INT_CONFIG 0x61600UL
+
+#define UVH_RTC2_INT_CONFIG_VECTOR_SHFT 0
+#define UVH_RTC2_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_RTC2_INT_CONFIG_DM_SHFT 8
+#define UVH_RTC2_INT_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_RTC2_INT_CONFIG_DESTMODE_SHFT 11
+#define UVH_RTC2_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_RTC2_INT_CONFIG_STATUS_SHFT 12
+#define UVH_RTC2_INT_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_RTC2_INT_CONFIG_P_SHFT 13
+#define UVH_RTC2_INT_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_RTC2_INT_CONFIG_T_SHFT 15
+#define UVH_RTC2_INT_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_RTC2_INT_CONFIG_M_SHFT 16
+#define UVH_RTC2_INT_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_RTC2_INT_CONFIG_APIC_ID_SHFT 32
+#define UVH_RTC2_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_rtc2_int_config_u {
+    unsigned long	v;
+    struct uvh_rtc2_int_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                           UVH_RTC3_INT_CONFIG                             */
+/* ========================================================================= */
+#define UVH_RTC3_INT_CONFIG 0x61640UL
+
+#define UVH_RTC3_INT_CONFIG_VECTOR_SHFT 0
+#define UVH_RTC3_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_RTC3_INT_CONFIG_DM_SHFT 8
+#define UVH_RTC3_INT_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_RTC3_INT_CONFIG_DESTMODE_SHFT 11
+#define UVH_RTC3_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_RTC3_INT_CONFIG_STATUS_SHFT 12
+#define UVH_RTC3_INT_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_RTC3_INT_CONFIG_P_SHFT 13
+#define UVH_RTC3_INT_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_RTC3_INT_CONFIG_T_SHFT 15
+#define UVH_RTC3_INT_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_RTC3_INT_CONFIG_M_SHFT 16
+#define UVH_RTC3_INT_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_RTC3_INT_CONFIG_APIC_ID_SHFT 32
+#define UVH_RTC3_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_rtc3_int_config_u {
+    unsigned long	v;
+    struct uvh_rtc3_int_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                            UVH_RTC_INC_RATIO                              */
+/* ========================================================================= */
+#define UVH_RTC_INC_RATIO 0x350000UL
+
+#define UVH_RTC_INC_RATIO_FRACTION_SHFT 0
+#define UVH_RTC_INC_RATIO_FRACTION_MASK 0x00000000000fffffUL
+#define UVH_RTC_INC_RATIO_RATIO_SHFT 20
+#define UVH_RTC_INC_RATIO_RATIO_MASK 0x0000000000700000UL
+
+union uvh_rtc_inc_ratio_u {
+    unsigned long	v;
+    struct uvh_rtc_inc_ratio_s {
+	unsigned long	fraction : 20;  /* RW */
+	unsigned long	ratio    :  3;  /* RW */
+	unsigned long	rsvd_23_63: 41;  /*    */
+    } s;
+};
+
 /* ========================================================================= */
 /*                          UVH_SI_ADDR_MAP_CONFIG                           */
 /* ========================================================================= */
-- 
cgit v1.2.3-70-g09d2


From bfc0f5947afa5e3a13e55867f4478c8a92c11dca Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Tue, 1 Jul 2008 11:43:24 -0700
Subject: x86: merge tsc calibration

Merge the tsc calibration code for the 32bit and 64bit kernel.
The paravirtualized calculate_cpu_khz for 64bit now points to the correct
tsc_calibrate code as in 32bit.
Original native_calculate_cpu_khz for 64 bit is now called as calibrate_cpu.

Also moved the recalibrate_cpu_khz function in the common file.
Note that this function is called only from powernow K7 cpu freq driver.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Signed-off-by: Dan Hecht <dhecht@vmware.com>
Cc: Dan Hecht <dhecht@vmware.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/time_64.c |  26 ++++++---
 arch/x86/kernel/tsc.c     | 131 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/tsc_32.c  |  74 +-------------------------
 arch/x86/kernel/tsc_64.c  |  94 +--------------------------------
 include/asm-x86/hpet.h    |   2 +-
 include/asm-x86/tsc.h     |   1 -
 6 files changed, 154 insertions(+), 174 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 39ae8511a13..c6ac4dad41f 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -56,7 +56,7 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
 /* calibrate_cpu is used on systems with fixed rate TSCs to determine
  * processor frequency */
 #define TICK_COUNT 100000000
-unsigned long __init native_calculate_cpu_khz(void)
+static unsigned long __init calibrate_cpu(void)
 {
 	int tsc_start, tsc_now;
 	int i, no_ctr_free;
@@ -114,14 +114,18 @@ void __init hpet_time_init(void)
 	setup_irq(0, &irq0);
 }
 
+extern void set_cyc2ns_scale(unsigned long cpu_khz, int cpu);
+
 void __init time_init(void)
 {
-	tsc_calibrate();
+	int cpu;
+
+	cpu_khz = calculate_cpu_khz();
+	tsc_khz = cpu_khz;
 
-	cpu_khz = tsc_khz;
 	if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
-		(boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
-		cpu_khz = calculate_cpu_khz();
+			(boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
+		cpu_khz = calibrate_cpu();
 
 	lpj_fine = ((unsigned long)tsc_khz * 1000)/HZ;
 
@@ -134,7 +138,17 @@ void __init time_init(void)
 		vgetcpu_mode = VGETCPU_LSL;
 
 	printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
-		cpu_khz / 1000, cpu_khz % 1000);
+			cpu_khz / 1000, cpu_khz % 1000);
+
+	/*
+	 * Secondary CPUs do not run through tsc_init(), so set up
+	 * all the scale factors for all CPUs, assuming the same
+	 * speed as the bootup CPU. (cpufreq notifiers will fix this
+	 * up if their speed diverges)
+	 */
+	for_each_possible_cpu(cpu)
+		set_cyc2ns_scale(cpu_khz, cpu);
+
 	init_tsc_clocksource();
 	late_time_init = choose_time_init();
 }
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 5d0be778fad..e6ee14533c7 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1,7 +1,11 @@
+#include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/timer.h>
+#include <linux/acpi_pmtmr.h>
+
+#include <asm/hpet.h>
 
 unsigned int cpu_khz;           /* TSC clocks / usec, not used here */
 EXPORT_SYMBOL(cpu_khz);
@@ -84,3 +88,130 @@ int __init notsc_setup(char *str)
 #endif
 
 __setup("notsc", notsc_setup);
+
+#define MAX_RETRIES     5
+#define SMI_TRESHOLD    50000
+
+/*
+ * Read TSC and the reference counters. Take care of SMI disturbance
+ */
+static u64 __init tsc_read_refs(u64 *pm, u64 *hpet)
+{
+	u64 t1, t2;
+	int i;
+
+	for (i = 0; i < MAX_RETRIES; i++) {
+		t1 = get_cycles();
+		if (hpet)
+			*hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
+		else
+			*pm = acpi_pm_read_early();
+		t2 = get_cycles();
+		if ((t2 - t1) < SMI_TRESHOLD)
+			return t2;
+	}
+	return ULLONG_MAX;
+}
+
+/**
+ * tsc_calibrate - calibrate the tsc on boot
+ */
+static unsigned int __init tsc_calibrate(void)
+{
+	unsigned long flags;
+	u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2;
+	int hpet = is_hpet_enabled();
+	unsigned int tsc_khz_val = 0;
+
+	local_irq_save(flags);
+
+	tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
+
+	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+	outb(0xb0, 0x43);
+	outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
+	outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
+	tr1 = get_cycles();
+	while ((inb(0x61) & 0x20) == 0);
+	tr2 = get_cycles();
+
+	tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
+
+	local_irq_restore(flags);
+
+	/*
+	 * Preset the result with the raw and inaccurate PIT
+	 * calibration value
+	 */
+	delta = (tr2 - tr1);
+	do_div(delta, 50);
+	tsc_khz_val = delta;
+
+	/* hpet or pmtimer available ? */
+	if (!hpet && !pm1 && !pm2) {
+		printk(KERN_INFO "TSC calibrated against PIT\n");
+		goto out;
+	}
+
+	/* Check, whether the sampling was disturbed by an SMI */
+	if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) {
+		printk(KERN_WARNING "TSC calibration disturbed by SMI, "
+				"using PIT calibration result\n");
+		goto out;
+	}
+
+	tsc2 = (tsc2 - tsc1) * 1000000LL;
+
+	if (hpet) {
+		printk(KERN_INFO "TSC calibrated against HPET\n");
+		if (hpet2 < hpet1)
+			hpet2 += 0x100000000ULL;
+		hpet2 -= hpet1;
+		tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
+		do_div(tsc1, 1000000);
+	} else {
+		printk(KERN_INFO "TSC calibrated against PM_TIMER\n");
+		if (pm2 < pm1)
+			pm2 += (u64)ACPI_PM_OVRRUN;
+		pm2 -= pm1;
+		tsc1 = pm2 * 1000000000LL;
+		do_div(tsc1, PMTMR_TICKS_PER_SEC);
+	}
+
+	do_div(tsc2, tsc1);
+	tsc_khz_val = tsc2;
+
+out:
+	return tsc_khz_val;
+}
+
+unsigned long native_calculate_cpu_khz(void)
+{
+	return tsc_calibrate();
+}
+
+#ifdef CONFIG_X86_32
+/* Only called from the Powernow K7 cpu freq driver */
+int recalibrate_cpu_khz(void)
+{
+#ifndef CONFIG_SMP
+	unsigned long cpu_khz_old = cpu_khz;
+
+	if (cpu_has_tsc) {
+		cpu_khz = calculate_cpu_khz();
+		tsc_khz = cpu_khz;
+		cpu_data(0).loops_per_jiffy =
+			cpufreq_scale(cpu_data(0).loops_per_jiffy,
+					cpu_khz_old, cpu_khz);
+		return 0;
+	} else
+		return -ENODEV;
+#else
+	return -ENODEV;
+#endif
+}
+
+EXPORT_SYMBOL(recalibrate_cpu_khz);
+
+#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index dc8990056d7..40c0aafb358 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -42,7 +42,7 @@ extern int tsc_disabled;
 
 DEFINE_PER_CPU(unsigned long, cyc2ns);
 
-static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 {
 	unsigned long long tsc_now, ns_now;
 	unsigned long flags, *scale;
@@ -65,78 +65,6 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 	local_irq_restore(flags);
 }
 
-unsigned long native_calculate_cpu_khz(void)
-{
-	unsigned long long start, end;
-	unsigned long count;
-	u64 delta64 = (u64)ULLONG_MAX;
-	int i;
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	/* run 3 times to ensure the cache is warm and to get an accurate reading */
-	for (i = 0; i < 3; i++) {
-		mach_prepare_counter();
-		rdtscll(start);
-		mach_countup(&count);
-		rdtscll(end);
-
-		/*
-		 * Error: ECTCNEVERSET
-		 * The CTC wasn't reliable: we got a hit on the very first read,
-		 * or the CPU was so fast/slow that the quotient wouldn't fit in
-		 * 32 bits..
-		 */
-		if (count <= 1)
-			continue;
-
-		/* cpu freq too slow: */
-		if ((end - start) <= CALIBRATE_TIME_MSEC)
-			continue;
-
-		/*
-		 * We want the minimum time of all runs in case one of them
-		 * is inaccurate due to SMI or other delay
-		 */
-		delta64 = min(delta64, (end - start));
-	}
-
-	/* cpu freq too fast (or every run was bad): */
-	if (delta64 > (1ULL<<32))
-		goto err;
-
-	delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
-	do_div(delta64,CALIBRATE_TIME_MSEC);
-
-	local_irq_restore(flags);
-	return (unsigned long)delta64;
-err:
-	local_irq_restore(flags);
-	return 0;
-}
-
-int recalibrate_cpu_khz(void)
-{
-#ifndef CONFIG_SMP
-	unsigned long cpu_khz_old = cpu_khz;
-
-	if (cpu_has_tsc) {
-		cpu_khz = calculate_cpu_khz();
-		tsc_khz = cpu_khz;
-		cpu_data(0).loops_per_jiffy =
-			cpufreq_scale(cpu_data(0).loops_per_jiffy,
-					cpu_khz_old, cpu_khz);
-		return 0;
-	} else
-		return -ENODEV;
-#else
-	return -ENODEV;
-#endif
-}
-
-EXPORT_SYMBOL(recalibrate_cpu_khz);
-
 #ifdef CONFIG_CPU_FREQ
 
 /*
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 69cbe4c9f05..c852ff9bd5d 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -40,7 +40,7 @@ extern int tsc_disabled;
 
 DEFINE_PER_CPU(unsigned long, cyc2ns);
 
-static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 {
 	unsigned long long tsc_now, ns_now;
 	unsigned long flags, *scale;
@@ -130,98 +130,6 @@ core_initcall(cpufreq_tsc);
 
 #endif
 
-#define MAX_RETRIES	5
-#define SMI_TRESHOLD	50000
-
-/*
- * Read TSC and the reference counters. Take care of SMI disturbance
- */
-static unsigned long __init tsc_read_refs(unsigned long *pm,
-					  unsigned long *hpet)
-{
-	unsigned long t1, t2;
-	int i;
-
-	for (i = 0; i < MAX_RETRIES; i++) {
-		t1 = get_cycles();
-		if (hpet)
-			*hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
-		else
-			*pm = acpi_pm_read_early();
-		t2 = get_cycles();
-		if ((t2 - t1) < SMI_TRESHOLD)
-			return t2;
-	}
-	return ULONG_MAX;
-}
-
-/**
- * tsc_calibrate - calibrate the tsc on boot
- */
-void __init tsc_calibrate(void)
-{
-	unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2;
-	int hpet = is_hpet_enabled(), cpu;
-
-	local_irq_save(flags);
-
-	tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
-
-	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
-
-	outb(0xb0, 0x43);
-	outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
-	outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
-	tr1 = get_cycles();
-	while ((inb(0x61) & 0x20) == 0);
-	tr2 = get_cycles();
-
-	tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
-
-	local_irq_restore(flags);
-
-	/*
-	 * Preset the result with the raw and inaccurate PIT
-	 * calibration value
-	 */
-	tsc_khz = (tr2 - tr1) / 50;
-
-	/* hpet or pmtimer available ? */
-	if (!hpet && !pm1 && !pm2) {
-		printk(KERN_INFO "TSC calibrated against PIT\n");
-		goto out;
-	}
-
-	/* Check, whether the sampling was disturbed by an SMI */
-	if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) {
-		printk(KERN_WARNING "TSC calibration disturbed by SMI, "
-		       "using PIT calibration result\n");
-		goto out;
-	}
-
-	tsc2 = (tsc2 - tsc1) * 1000000L;
-
-	if (hpet) {
-		printk(KERN_INFO "TSC calibrated against HPET\n");
-		if (hpet2 < hpet1)
-			hpet2 += 0x100000000UL;
-		hpet2 -= hpet1;
-		tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000;
-	} else {
-		printk(KERN_INFO "TSC calibrated against PM_TIMER\n");
-		if (pm2 < pm1)
-			pm2 += ACPI_PM_OVRRUN;
-		pm2 -= pm1;
-		tsc1 = (pm2 * 1000000000) / PMTMR_TICKS_PER_SEC;
-	}
-
-	tsc_khz = tsc2 / tsc1;
-
-out:
-	for_each_possible_cpu(cpu)
-		set_cyc2ns_scale(tsc_khz, cpu);
-}
-
 /*
  * Make an educated guess if the TSC is trustworthy and synchronized
  * over all CPUs.
diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h
index 6a9b4ac59bf..82f1ac641bd 100644
--- a/include/asm-x86/hpet.h
+++ b/include/asm-x86/hpet.h
@@ -86,8 +86,8 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler);
 #else /* CONFIG_HPET_TIMER */
 
 static inline int hpet_enable(void) { return 0; }
-static inline unsigned long hpet_readl(unsigned long a) { return 0; }
 static inline int is_hpet_enabled(void) { return 0; }
+#define hpet_readl(a) 0
 
 #endif
 #endif /* ASM_X86_HPET_H */
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index 548873ab5fc..761054d7fef 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -58,7 +58,6 @@ int check_tsc_unstable(void);
 extern void check_tsc_sync_source(int cpu);
 extern void check_tsc_sync_target(void);
 
-extern void tsc_calibrate(void);
 extern int notsc_setup(char *);
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 8fbbc4b45ce3e4c0eeb15004c79c72b6896a79c2 Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Tue, 1 Jul 2008 11:43:34 -0700
Subject: x86: merge tsc_init and clocksource code

Unify the clocksource code.
Unify the tsc_init code.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Signed-off-by: Dan Hecht <dhecht@vmware.com>
Cc: Dan Hecht <dhecht@vmware.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile  |   2 +-
 arch/x86/kernel/time_64.c |  32 +------
 arch/x86/kernel/tsc.c     | 212 +++++++++++++++++++++++++++++++++++++++++++++-
 arch/x86/kernel/tsc_32.c  | 188 ----------------------------------------
 arch/x86/kernel/tsc_64.c  | 106 -----------------------
 include/asm-x86/apic.h    |   7 +-
 include/asm-x86/delay.h   |   4 +
 include/asm-x86/time.h    |   2 +
 include/asm-x86/tsc.h     |   1 -
 9 files changed, 224 insertions(+), 330 deletions(-)
 delete mode 100644 arch/x86/kernel/tsc_32.c
 delete mode 100644 arch/x86/kernel/tsc_64.c

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index ca904ee1725..59b14c940a2 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -26,7 +26,7 @@ obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o
 obj-y			+= bootflag.o e820.o
 obj-y			+= pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o pci-nommu.o
-obj-y			+= tsc_$(BITS).o io_delay.o rtc.o tsc.o
+obj-y			+= tsc.o io_delay.o rtc.o
 
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
 obj-y				+= process.o
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index c6ac4dad41f..e3d49c553af 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -56,7 +56,7 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
 /* calibrate_cpu is used on systems with fixed rate TSCs to determine
  * processor frequency */
 #define TICK_COUNT 100000000
-static unsigned long __init calibrate_cpu(void)
+unsigned long __init calibrate_cpu(void)
 {
 	int tsc_start, tsc_now;
 	int i, no_ctr_free;
@@ -114,41 +114,13 @@ void __init hpet_time_init(void)
 	setup_irq(0, &irq0);
 }
 
-extern void set_cyc2ns_scale(unsigned long cpu_khz, int cpu);
-
 void __init time_init(void)
 {
-	int cpu;
-
-	cpu_khz = calculate_cpu_khz();
-	tsc_khz = cpu_khz;
-
-	if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
-			(boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
-		cpu_khz = calibrate_cpu();
-
-	lpj_fine = ((unsigned long)tsc_khz * 1000)/HZ;
-
-	if (unsynchronized_tsc())
-		mark_tsc_unstable("TSCs unsynchronized");
-
+	tsc_init();
 	if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
 		vgetcpu_mode = VGETCPU_RDTSCP;
 	else
 		vgetcpu_mode = VGETCPU_LSL;
 
-	printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
-			cpu_khz / 1000, cpu_khz % 1000);
-
-	/*
-	 * Secondary CPUs do not run through tsc_init(), so set up
-	 * all the scale factors for all CPUs, assuming the same
-	 * speed as the bootup CPU. (cpufreq notifiers will fix this
-	 * up if their speed diverges)
-	 */
-	for_each_possible_cpu(cpu)
-		set_cyc2ns_scale(cpu_khz, cpu);
-
-	init_tsc_clocksource();
 	late_time_init = choose_time_init();
 }
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 595f78a2221..94c16bdd569 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -5,8 +5,16 @@
 #include <linux/timer.h>
 #include <linux/acpi_pmtmr.h>
 #include <linux/cpufreq.h>
+#include <linux/dmi.h>
+#include <linux/delay.h>
+#include <linux/clocksource.h>
+#include <linux/percpu.h>
 
 #include <asm/hpet.h>
+#include <asm/timer.h>
+#include <asm/vgtod.h>
+#include <asm/time.h>
+#include <asm/delay.h>
 
 unsigned int cpu_khz;           /* TSC clocks / usec, not used here */
 EXPORT_SYMBOL(cpu_khz);
@@ -16,12 +24,12 @@ EXPORT_SYMBOL(tsc_khz);
 /*
  * TSC can be unstable due to cpufreq or due to unsynced TSCs
  */
-int tsc_unstable;
+static int tsc_unstable;
 
 /* native_sched_clock() is called before tsc_init(), so
    we must start with the TSC soft disabled to prevent
    erroneous rdtsc usage on !cpu_has_tsc processors */
-int tsc_disabled = -1;
+static int tsc_disabled = -1;
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -241,7 +249,7 @@ EXPORT_SYMBOL(recalibrate_cpu_khz);
 
 DEFINE_PER_CPU(unsigned long, cyc2ns);
 
-void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 {
 	unsigned long long tsc_now, ns_now;
 	unsigned long flags, *scale;
@@ -329,3 +337,201 @@ static int __init cpufreq_tsc(void)
 core_initcall(cpufreq_tsc);
 
 #endif /* CONFIG_CPU_FREQ */
+
+/* clocksource code */
+
+static struct clocksource clocksource_tsc;
+
+/*
+ * We compare the TSC to the cycle_last value in the clocksource
+ * structure to avoid a nasty time-warp. This can be observed in a
+ * very small window right after one CPU updated cycle_last under
+ * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
+ * is smaller than the cycle_last reference value due to a TSC which
+ * is slighty behind. This delta is nowhere else observable, but in
+ * that case it results in a forward time jump in the range of hours
+ * due to the unsigned delta calculation of the time keeping core
+ * code, which is necessary to support wrapping clocksources like pm
+ * timer.
+ */
+static cycle_t read_tsc(void)
+{
+	cycle_t ret = (cycle_t)get_cycles();
+
+	return ret >= clocksource_tsc.cycle_last ?
+		ret : clocksource_tsc.cycle_last;
+}
+
+static cycle_t __vsyscall_fn vread_tsc(void)
+{
+	cycle_t ret = (cycle_t)vget_cycles();
+
+	return ret >= __vsyscall_gtod_data.clock.cycle_last ?
+		ret : __vsyscall_gtod_data.clock.cycle_last;
+}
+
+static struct clocksource clocksource_tsc = {
+	.name                   = "tsc",
+	.rating                 = 300,
+	.read                   = read_tsc,
+	.mask                   = CLOCKSOURCE_MASK(64),
+	.shift                  = 22,
+	.flags                  = CLOCK_SOURCE_IS_CONTINUOUS |
+				  CLOCK_SOURCE_MUST_VERIFY,
+#ifdef CONFIG_X86_64
+	.vread                  = vread_tsc,
+#endif
+};
+
+void mark_tsc_unstable(char *reason)
+{
+	if (!tsc_unstable) {
+		tsc_unstable = 1;
+		printk("Marking TSC unstable due to %s\n", reason);
+		/* Change only the rating, when not registered */
+		if (clocksource_tsc.mult)
+			clocksource_change_rating(&clocksource_tsc, 0);
+		else
+			clocksource_tsc.rating = 0;
+	}
+}
+
+EXPORT_SYMBOL_GPL(mark_tsc_unstable);
+
+static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
+{
+	printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
+			d->ident);
+	tsc_unstable = 1;
+	return 0;
+}
+
+/* List of systems that have known TSC problems */
+static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
+	{
+		.callback = dmi_mark_tsc_unstable,
+		.ident = "IBM Thinkpad 380XD",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+			DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
+		},
+	},
+	{}
+};
+
+/*
+ * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
+ */
+#ifdef CONFIG_MGEODE_LX
+/* RTSC counts during suspend */
+#define RTSC_SUSP 0x100
+
+static void __init check_geode_tsc_reliable(void)
+{
+	unsigned long res_low, res_high;
+
+	rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
+	if (res_low & RTSC_SUSP)
+		clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
+}
+#else
+static inline void check_geode_tsc_reliable(void) { }
+#endif
+
+/*
+ * Make an educated guess if the TSC is trustworthy and synchronized
+ * over all CPUs.
+ */
+__cpuinit int unsynchronized_tsc(void)
+{
+	if (!cpu_has_tsc || tsc_unstable)
+		return 1;
+
+#ifdef CONFIG_SMP
+	if (apic_is_clustered_box())
+		return 1;
+#endif
+
+	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+		return 0;
+	/*
+	 * Intel systems are normally all synchronized.
+	 * Exceptions must mark TSC as unstable:
+	 */
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
+		/* assume multi socket systems are not synchronized: */
+		if (num_possible_cpus() > 1)
+			tsc_unstable = 1;
+	}
+
+	return tsc_unstable;
+}
+
+static void __init init_tsc_clocksource(void)
+{
+	clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
+			clocksource_tsc.shift);
+	/* lower the rating if we already know its unstable: */
+	if (check_tsc_unstable()) {
+		clocksource_tsc.rating = 0;
+		clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
+	}
+	clocksource_register(&clocksource_tsc);
+}
+
+void __init tsc_init(void)
+{
+	u64 lpj;
+	int cpu;
+
+	if (!cpu_has_tsc)
+		return;
+
+	cpu_khz = calculate_cpu_khz();
+	tsc_khz = cpu_khz;
+
+	if (!cpu_khz) {
+		mark_tsc_unstable("could not calculate TSC khz");
+		return;
+	}
+
+#ifdef CONFIG_X86_64
+	if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
+			(boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
+		cpu_khz = calibrate_cpu();
+#endif
+
+	lpj = ((u64)tsc_khz * 1000);
+	do_div(lpj, HZ);
+	lpj_fine = lpj;
+
+	printk("Detected %lu.%03lu MHz processor.\n",
+			(unsigned long)cpu_khz / 1000,
+			(unsigned long)cpu_khz % 1000);
+
+	/*
+	 * Secondary CPUs do not run through tsc_init(), so set up
+	 * all the scale factors for all CPUs, assuming the same
+	 * speed as the bootup CPU. (cpufreq notifiers will fix this
+	 * up if their speed diverges)
+	 */
+	for_each_possible_cpu(cpu)
+		set_cyc2ns_scale(cpu_khz, cpu);
+
+	if (tsc_disabled > 0)
+		return;
+
+	/* now allow native_sched_clock() to use rdtsc */
+	tsc_disabled = 0;
+
+	use_tsc_delay();
+	/* Check and install the TSC clocksource */
+	dmi_check_system(bad_tsc_dmi_table);
+
+	if (unsynchronized_tsc())
+		mark_tsc_unstable("TSCs unsynchronized");
+
+	check_geode_tsc_reliable();
+	init_tsc_clocksource();
+}
+
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
deleted file mode 100644
index bbc153d36f8..00000000000
--- a/arch/x86/kernel/tsc_32.c
+++ /dev/null
@@ -1,188 +0,0 @@
-#include <linux/sched.h>
-#include <linux/clocksource.h>
-#include <linux/workqueue.h>
-#include <linux/delay.h>
-#include <linux/cpufreq.h>
-#include <linux/jiffies.h>
-#include <linux/init.h>
-#include <linux/dmi.h>
-#include <linux/percpu.h>
-
-#include <asm/delay.h>
-#include <asm/tsc.h>
-#include <asm/io.h>
-#include <asm/timer.h>
-
-#include "mach_timer.h"
-
-extern int tsc_unstable;
-extern int tsc_disabled;
-
-/* clock source code */
-
-static struct clocksource clocksource_tsc;
-
-/*
- * We compare the TSC to the cycle_last value in the clocksource
- * structure to avoid a nasty time-warp issue. This can be observed in
- * a very small window right after one CPU updated cycle_last under
- * xtime lock and the other CPU reads a TSC value which is smaller
- * than the cycle_last reference value due to a TSC which is slighty
- * behind. This delta is nowhere else observable, but in that case it
- * results in a forward time jump in the range of hours due to the
- * unsigned delta calculation of the time keeping core code, which is
- * necessary to support wrapping clocksources like pm timer.
- */
-static cycle_t read_tsc(void)
-{
-	cycle_t ret;
-
-	rdtscll(ret);
-
-	return ret >= clocksource_tsc.cycle_last ?
-		ret : clocksource_tsc.cycle_last;
-}
-
-static struct clocksource clocksource_tsc = {
-	.name			= "tsc",
-	.rating			= 300,
-	.read			= read_tsc,
-	.mask			= CLOCKSOURCE_MASK(64),
-	.mult			= 0, /* to be set */
-	.shift			= 22,
-	.flags			= CLOCK_SOURCE_IS_CONTINUOUS |
-				  CLOCK_SOURCE_MUST_VERIFY,
-};
-
-void mark_tsc_unstable(char *reason)
-{
-	if (!tsc_unstable) {
-		tsc_unstable = 1;
-		printk("Marking TSC unstable due to: %s.\n", reason);
-		/* Can be called before registration */
-		if (clocksource_tsc.mult)
-			clocksource_change_rating(&clocksource_tsc, 0);
-		else
-			clocksource_tsc.rating = 0;
-	}
-}
-EXPORT_SYMBOL_GPL(mark_tsc_unstable);
-
-static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
-{
-	printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
-	       d->ident);
-	tsc_unstable = 1;
-	return 0;
-}
-
-/* List of systems that have known TSC problems */
-static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
-	{
-	 .callback = dmi_mark_tsc_unstable,
-	 .ident = "IBM Thinkpad 380XD",
-	 .matches = {
-		     DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
-		     DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
-		     },
-	 },
-	 {}
-};
-
-/*
- * Make an educated guess if the TSC is trustworthy and synchronized
- * over all CPUs.
- */
-__cpuinit int unsynchronized_tsc(void)
-{
-	if (!cpu_has_tsc || tsc_unstable)
-		return 1;
-
-	/* Anything with constant TSC should be synchronized */
-	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
-		return 0;
-
-	/*
-	 * Intel systems are normally all synchronized.
-	 * Exceptions must mark TSC as unstable:
-	 */
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
-		/* assume multi socket systems are not synchronized: */
-		if (num_possible_cpus() > 1)
-			tsc_unstable = 1;
-	}
-	return tsc_unstable;
-}
-
-/*
- * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
- */
-#ifdef CONFIG_MGEODE_LX
-/* RTSC counts during suspend */
-#define RTSC_SUSP 0x100
-
-static void __init check_geode_tsc_reliable(void)
-{
-	unsigned long res_low, res_high;
-
-	rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
-	if (res_low & RTSC_SUSP)
-		clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
-}
-#else
-static inline void check_geode_tsc_reliable(void) { }
-#endif
-
-
-void __init tsc_init(void)
-{
-	int cpu;
-	u64 lpj;
-
-	if (!cpu_has_tsc || tsc_disabled > 0)
-		return;
-
-	cpu_khz = calculate_cpu_khz();
-	tsc_khz = cpu_khz;
-
-	if (!cpu_khz) {
-		mark_tsc_unstable("could not calculate TSC khz");
-		return;
-	}
-
-	lpj = ((u64)tsc_khz * 1000);
-	do_div(lpj, HZ);
-	lpj_fine = lpj;
-
-	/* now allow native_sched_clock() to use rdtsc */
-	tsc_disabled = 0;
-
-	printk("Detected %lu.%03lu MHz processor.\n",
-				(unsigned long)cpu_khz / 1000,
-				(unsigned long)cpu_khz % 1000);
-
-	/*
-	 * Secondary CPUs do not run through tsc_init(), so set up
-	 * all the scale factors for all CPUs, assuming the same
-	 * speed as the bootup CPU. (cpufreq notifiers will fix this
-	 * up if their speed diverges)
-	 */
-	for_each_possible_cpu(cpu)
-		set_cyc2ns_scale(cpu_khz, cpu);
-
-	use_tsc_delay();
-
-	/* Check and install the TSC clocksource */
-	dmi_check_system(bad_tsc_dmi_table);
-
-	unsynchronized_tsc();
-	check_geode_tsc_reliable();
-	clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
-						    clocksource_tsc.shift);
-	/* lower the rating if we already know its unstable: */
-	if (check_tsc_unstable()) {
-		clocksource_tsc.rating = 0;
-		clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
-	}
-	clocksource_register(&clocksource_tsc);
-}
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
deleted file mode 100644
index 80a274b018c..00000000000
--- a/arch/x86/kernel/tsc_64.c
+++ /dev/null
@@ -1,106 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/clocksource.h>
-#include <linux/time.h>
-#include <linux/acpi.h>
-#include <linux/cpufreq.h>
-#include <linux/acpi_pmtmr.h>
-
-#include <asm/hpet.h>
-#include <asm/timex.h>
-#include <asm/timer.h>
-#include <asm/vgtod.h>
-
-extern int tsc_unstable;
-extern int tsc_disabled;
-
-/*
- * Make an educated guess if the TSC is trustworthy and synchronized
- * over all CPUs.
- */
-__cpuinit int unsynchronized_tsc(void)
-{
-	if (tsc_unstable)
-		return 1;
-
-#ifdef CONFIG_SMP
-	if (apic_is_clustered_box())
-		return 1;
-#endif
-
-	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
-		return 0;
-
-	/* Assume multi socket systems are not synchronized */
-	return num_present_cpus() > 1;
-}
-
-static struct clocksource clocksource_tsc;
-
-/*
- * We compare the TSC to the cycle_last value in the clocksource
- * structure to avoid a nasty time-warp. This can be observed in a
- * very small window right after one CPU updated cycle_last under
- * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
- * is smaller than the cycle_last reference value due to a TSC which
- * is slighty behind. This delta is nowhere else observable, but in
- * that case it results in a forward time jump in the range of hours
- * due to the unsigned delta calculation of the time keeping core
- * code, which is necessary to support wrapping clocksources like pm
- * timer.
- */
-static cycle_t read_tsc(void)
-{
-	cycle_t ret = (cycle_t)get_cycles();
-
-	return ret >= clocksource_tsc.cycle_last ?
-		ret : clocksource_tsc.cycle_last;
-}
-
-static cycle_t __vsyscall_fn vread_tsc(void)
-{
-	cycle_t ret = (cycle_t)vget_cycles();
-
-	return ret >= __vsyscall_gtod_data.clock.cycle_last ?
-		ret : __vsyscall_gtod_data.clock.cycle_last;
-}
-
-static struct clocksource clocksource_tsc = {
-	.name			= "tsc",
-	.rating			= 300,
-	.read			= read_tsc,
-	.mask			= CLOCKSOURCE_MASK(64),
-	.shift			= 22,
-	.flags			= CLOCK_SOURCE_IS_CONTINUOUS |
-				  CLOCK_SOURCE_MUST_VERIFY,
-	.vread			= vread_tsc,
-};
-
-void mark_tsc_unstable(char *reason)
-{
-	if (!tsc_unstable) {
-		tsc_unstable = 1;
-		printk("Marking TSC unstable due to %s\n", reason);
-		/* Change only the rating, when not registered */
-		if (clocksource_tsc.mult)
-			clocksource_change_rating(&clocksource_tsc, 0);
-		else
-			clocksource_tsc.rating = 0;
-	}
-}
-EXPORT_SYMBOL_GPL(mark_tsc_unstable);
-
-void __init init_tsc_clocksource(void)
-{
-	if (tsc_disabled > 0)
-		return;
-
-	clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
-			clocksource_tsc.shift);
-	if (check_tsc_unstable())
-		clocksource_tsc.rating = 0;
-
-	clocksource_register(&clocksource_tsc);
-}
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index a29807737d3..4e2c1e517f0 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -121,12 +121,17 @@ extern void enable_NMI_through_LVT0(void);
  */
 #ifdef CONFIG_X86_64
 extern void early_init_lapic_mapping(void);
+extern int apic_is_clustered_box(void);
+#else
+static inline int apic_is_clustered_box(void)
+{
+	return 0;
+}
 #endif
 
 extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask);
 extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
 
-extern int apic_is_clustered_box(void);
 
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
diff --git a/include/asm-x86/delay.h b/include/asm-x86/delay.h
index 409a649204a..bb80880c834 100644
--- a/include/asm-x86/delay.h
+++ b/include/asm-x86/delay.h
@@ -26,6 +26,10 @@ extern void __delay(unsigned long loops);
 	((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
 	__ndelay(n))
 
+#ifdef CONFIG_X86_32
 void use_tsc_delay(void);
+#else
+#define use_tsc_delay() {}
+#endif
 
 #endif /* _ASM_X86_DELAY_H */
diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h
index bce72d7a958..a17fa473e91 100644
--- a/include/asm-x86/time.h
+++ b/include/asm-x86/time.h
@@ -56,4 +56,6 @@ static inline int native_set_wallclock(unsigned long nowtime)
 
 #endif /* CONFIG_PARAVIRT */
 
+extern unsigned long __init calibrate_cpu(void);
+
 #endif
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index 761054d7fef..cb6f6ee45b8 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -48,7 +48,6 @@ static __always_inline cycles_t vget_cycles(void)
 extern void tsc_init(void);
 extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);
-extern void init_tsc_clocksource(void);
 int check_tsc_unstable(void);
 
 /*
-- 
cgit v1.2.3-70-g09d2


From e93ef949fd9a3f237aedfb8e64414b28980530b8 Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Tue, 1 Jul 2008 11:43:36 -0700
Subject: x86: rename paravirtualized TSC functions

Rename the paravirtualized calculate_cpu_khz to calibrate_tsc.
In all cases, we actually calibrate_tsc and use that as the cpu_khz value.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Signed-off-by: Dan Hecht <dhecht@vmware.com>
Cc: Dan Hecht <dhecht@vmware.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/paravirt.c    |  2 +-
 arch/x86/kernel/tsc.c         | 18 +++++++-----------
 arch/x86/kernel/vmi_32.c      |  2 +-
 arch/x86/kernel/vmiclock_32.c |  4 ++--
 arch/x86/lguest/boot.c        |  4 ++--
 arch/x86/xen/enlighten.c      |  2 +-
 arch/x86/xen/time.c           |  4 ++--
 arch/x86/xen/xen-ops.h        |  2 +-
 include/asm-x86/paravirt.h    |  4 ++--
 include/asm-x86/timer.h       |  4 ++--
 include/asm-x86/vmi_time.h    |  2 +-
 11 files changed, 22 insertions(+), 26 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e7e5652f65b..e0f571d58c1 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -285,7 +285,7 @@ struct pv_time_ops pv_time_ops = {
 	.get_wallclock = native_get_wallclock,
 	.set_wallclock = native_set_wallclock,
 	.sched_clock = native_sched_clock,
-	.get_cpu_khz = native_calculate_cpu_khz,
+	.get_tsc_khz = native_calibrate_tsc,
 };
 
 struct pv_irq_ops pv_irq_ops = {
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 94c16bdd569..3c36f92160c 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -123,9 +123,9 @@ static u64 __init tsc_read_refs(u64 *pm, u64 *hpet)
 }
 
 /**
- * tsc_calibrate - calibrate the tsc on boot
+ * native_calibrate_tsc - calibrate the tsc on boot
  */
-static unsigned int __init tsc_calibrate(void)
+unsigned long native_calibrate_tsc(void)
 {
 	unsigned long flags;
 	u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2;
@@ -195,10 +195,6 @@ out:
 	return tsc_khz_val;
 }
 
-unsigned long native_calculate_cpu_khz(void)
-{
-	return tsc_calibrate();
-}
 
 #ifdef CONFIG_X86_32
 /* Only called from the Powernow K7 cpu freq driver */
@@ -208,8 +204,8 @@ int recalibrate_cpu_khz(void)
 	unsigned long cpu_khz_old = cpu_khz;
 
 	if (cpu_has_tsc) {
-		cpu_khz = calculate_cpu_khz();
-		tsc_khz = cpu_khz;
+		tsc_khz = calibrate_tsc();
+		cpu_khz = tsc_khz;
 		cpu_data(0).loops_per_jiffy =
 			cpufreq_scale(cpu_data(0).loops_per_jiffy,
 					cpu_khz_old, cpu_khz);
@@ -487,10 +483,10 @@ void __init tsc_init(void)
 	if (!cpu_has_tsc)
 		return;
 
-	cpu_khz = calculate_cpu_khz();
-	tsc_khz = cpu_khz;
+	tsc_khz = calibrate_tsc();
+	cpu_khz = tsc_khz;
 
-	if (!cpu_khz) {
+	if (!tsc_khz) {
 		mark_tsc_unstable("could not calculate TSC khz");
 		return;
 	}
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 946bf13b44a..b15346092b7 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -932,7 +932,7 @@ static inline int __init activate_vmi(void)
 		pv_apic_ops.setup_secondary_clock = vmi_time_ap_init;
 #endif
 		pv_time_ops.sched_clock = vmi_sched_clock;
- 		pv_time_ops.get_cpu_khz = vmi_cpu_khz;
+		pv_time_ops.get_tsc_khz = vmi_tsc_khz;
 
 		/* We have true wallclock functions; disable CMOS clock sync */
 		no_sync_cmos_clock = 1;
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index ba7d19e102b..6953859fe28 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -69,8 +69,8 @@ unsigned long long vmi_sched_clock(void)
 	return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
 }
 
-/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */
-unsigned long vmi_cpu_khz(void)
+/* paravirt_ops.get_tsc_khz = vmi_tsc_khz */
+unsigned long vmi_tsc_khz(void)
 {
 	unsigned long long khz;
 	khz = vmi_timer_ops.get_cycle_frequency();
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index e72cf0793fb..50dad44fb54 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -607,7 +607,7 @@ static unsigned long lguest_get_wallclock(void)
  * what speed it runs at, or 0 if it's unusable as a reliable clock source.
  * This matches what we want here: if we return 0 from this function, the x86
  * TSC clock will give up and not register itself. */
-static unsigned long lguest_cpu_khz(void)
+static unsigned long lguest_tsc_khz(void)
 {
 	return lguest_data.tsc_khz;
 }
@@ -998,7 +998,7 @@ __init void lguest_init(void)
 	/* time operations */
 	pv_time_ops.get_wallclock = lguest_get_wallclock;
 	pv_time_ops.time_init = lguest_time_init;
-	pv_time_ops.get_cpu_khz = lguest_cpu_khz;
+	pv_time_ops.get_tsc_khz = lguest_tsc_khz;
 
 	/* Now is a good time to look at the implementations of these functions
 	 * before returning to the rest of lguest_init(). */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 3b980831602..dcd4e51f2f1 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1062,7 +1062,7 @@ static const struct pv_time_ops xen_time_ops __initdata = {
 
 	.set_wallclock = xen_set_wallclock,
 	.get_wallclock = xen_get_wallclock,
-	.get_cpu_khz = xen_cpu_khz,
+	.get_tsc_khz = xen_tsc_khz,
 	.sched_clock = xen_sched_clock,
 };
 
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 64f0038b955..685b77470fc 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -197,8 +197,8 @@ unsigned long long xen_sched_clock(void)
 }
 
 
-/* Get the CPU speed from Xen */
-unsigned long xen_cpu_khz(void)
+/* Get the TSC speed from Xen */
+unsigned long xen_tsc_khz(void)
 {
 	u64 xen_khz = 1000000ULL << 32;
 	const struct pvclock_vcpu_time_info *info =
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9a055592a30..d852ddbb344 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -32,7 +32,7 @@ void __init xen_build_dynamic_phys_to_machine(void);
 
 void xen_setup_timer(int cpu);
 void xen_setup_cpu_clockevents(void);
-unsigned long xen_cpu_khz(void);
+unsigned long xen_tsc_khz(void);
 void __init xen_time_init(void);
 unsigned long xen_get_wallclock(void);
 int xen_set_wallclock(unsigned long time);
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 6d8966f9d19..ef5e8ec6a6a 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -84,7 +84,7 @@ struct pv_time_ops {
 	int (*set_wallclock)(unsigned long);
 
 	unsigned long long (*sched_clock)(void);
-	unsigned long (*get_cpu_khz)(void);
+	unsigned long (*get_tsc_khz)(void);
 };
 
 struct pv_cpu_ops {
@@ -779,7 +779,7 @@ static inline unsigned long long paravirt_sched_clock(void)
 {
 	return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
 }
-#define calculate_cpu_khz() (pv_time_ops.get_cpu_khz())
+#define calibrate_tsc() (pv_time_ops.get_tsc_khz())
 
 static inline unsigned long long paravirt_read_pmc(int counter)
 {
diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h
index 4f6fcb050c1..fb2a4ddddf3 100644
--- a/include/asm-x86/timer.h
+++ b/include/asm-x86/timer.h
@@ -7,14 +7,14 @@
 #define TICK_SIZE (tick_nsec / 1000)
 
 unsigned long long native_sched_clock(void);
-unsigned long native_calculate_cpu_khz(void);
+unsigned long native_calibrate_tsc(void);
 
 extern int timer_ack;
 extern int no_timer_check;
 extern int recalibrate_cpu_khz(void);
 
 #ifndef CONFIG_PARAVIRT
-#define calculate_cpu_khz() native_calculate_cpu_khz()
+#define calibrate_tsc() native_calibrate_tsc()
 #endif
 
 /* Accelerators for sched_clock()
diff --git a/include/asm-x86/vmi_time.h b/include/asm-x86/vmi_time.h
index 47818813032..c3118c38515 100644
--- a/include/asm-x86/vmi_time.h
+++ b/include/asm-x86/vmi_time.h
@@ -50,7 +50,7 @@ extern void __init vmi_time_init(void);
 extern unsigned long vmi_get_wallclock(void);
 extern int vmi_set_wallclock(unsigned long now);
 extern unsigned long long vmi_sched_clock(void);
-extern unsigned long vmi_cpu_khz(void);
+extern unsigned long vmi_tsc_khz(void);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 extern void __devinit vmi_time_bsp_init(void);
-- 
cgit v1.2.3-70-g09d2


From e407dffd17dcb592e1605a2b3dbbb81f9f3cbc21 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 9 Jul 2008 08:00:15 +0200
Subject: x86, uv: build fix for "x86, uv: update x86 mmr list for SGI uv"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix:

In file included from arch/x86/kernel/genx2apic_uv_x.c:25:
include/asm/uv/uv_mmrs.h:986: error: redefinition of ‘union uvh_rh_gam_cfg_overlay_config_mmr_u’
include/asm/uv/uv_mmrs.h:988: error: redefinition of ‘struct uvh_rh_gam_cfg_overlay_config_mmr_s’
include/asm/uv/uv_mmrs.h:1064: error: redefinition of ‘union uvh_rh_gam_mmioh_overlay_config_mmr_u’
include/asm/uv/uv_mmrs.h:1066: error: redefinition of ‘struct uvh_rh_gam_mmioh_overlay_config_mmr_s’

caused by duplicate section (cut & paste error) in commit
5d061e397db1 "x86, uv: update x86 mmr list for SGI uv".

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uv/uv_mmrs.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uv/uv_mmrs.h b/include/asm-x86/uv/uv_mmrs.h
index eb59201dc01..a8a1ad5c764 100644
--- a/include/asm-x86/uv/uv_mmrs.h
+++ b/include/asm-x86/uv/uv_mmrs.h
@@ -973,26 +973,6 @@ union uvh_rh_gam_cfg_overlay_config_mmr_u {
     } s;
 };
 
-/* ========================================================================= */
-/*                    UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR                      */
-/* ========================================================================= */
-#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR 0x1600020UL
-
-#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT 26
-#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-union uvh_rh_gam_cfg_overlay_config_mmr_u {
-    unsigned long	v;
-    struct uvh_rh_gam_cfg_overlay_config_mmr_s {
-	unsigned long	rsvd_0_25: 26;  /*    */
-	unsigned long	base   : 20;  /* RW */
-	unsigned long	rsvd_46_62: 17;  /*    */
-	unsigned long	enable :  1;  /* RW */
-    } s;
-};
-
 /* ========================================================================= */
 /*                    UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR                      */
 /* ========================================================================= */
-- 
cgit v1.2.3-70-g09d2


From 26e9e57b106445bbd8c965985e4e8af5293ae005 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 9 Jul 2008 08:02:54 +0200
Subject: x86, uv: build fix #2 for "x86, uv: update x86 mmr list for SGI uv"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix:

 In file included from arch/x86/kernel/tlb_uv.c:14:
 include/asm/uv/uv_mmrs.h:986: error: redefinition of ‘union uvh_rh_gam_cfg_overlay_config_mmr_u’
 include/asm/uv/uv_mmrs.h:988: error: redefinition of ‘struct uvh_rh_gam_cfg_overlay_config_mmr_s’
 include/asm/uv/uv_mmrs.h:1064: error: redefinition of ‘union uvh_rh_gam_mmioh_overlay_config_mmr_u’
 include/asm/uv/uv_mmrs.h:1066: error: redefinition of ‘struct uvh_rh_gam_mmioh_overlay_config_mmr_s’

caused by another duplicate section (cut & paste error) in commit
5d061e397db1 "x86, uv: update x86 mmr list for SGI uv".

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uv/uv_mmrs.h | 26 --------------------------
 1 file changed, 26 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uv/uv_mmrs.h b/include/asm-x86/uv/uv_mmrs.h
index a8a1ad5c764..151fd7fcb80 100644
--- a/include/asm-x86/uv/uv_mmrs.h
+++ b/include/asm-x86/uv/uv_mmrs.h
@@ -1027,32 +1027,6 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u {
     } s;
 };
 
-/* ========================================================================= */
-/*                   UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR                     */
-/* ========================================================================= */
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-union uvh_rh_gam_mmioh_overlay_config_mmr_u {
-    unsigned long	v;
-    struct uvh_rh_gam_mmioh_overlay_config_mmr_s {
-	unsigned long	rsvd_0_29: 30;  /*    */
-	unsigned long	base   : 16;  /* RW */
-	unsigned long	m_io   :  6;  /* RW */
-	unsigned long	n_io   :  4;  /* RW */
-	unsigned long	rsvd_56_62:  7;  /*    */
-	unsigned long	enable :  1;  /* RW */
-    } s;
-};
-
 /* ========================================================================= */
 /*                    UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR                      */
 /* ========================================================================= */
-- 
cgit v1.2.3-70-g09d2


From f8dd0d3c62164160c59034a96eb17d69ac8a0328 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 9 Jul 2008 09:12:55 +0200
Subject: x86: delay lib unification build fix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix:

arch/x86/lib/delay.c:93:24: error: macro "use_tsc_delay" passed 1 arguments, but takes just 0
arch/x86/lib/delay.c:94: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘{’ token

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/delay.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/delay.h b/include/asm-x86/delay.h
index bb80880c834..409a649204a 100644
--- a/include/asm-x86/delay.h
+++ b/include/asm-x86/delay.h
@@ -26,10 +26,6 @@ extern void __delay(unsigned long loops);
 	((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
 	__ndelay(n))
 
-#ifdef CONFIG_X86_32
 void use_tsc_delay(void);
-#else
-#define use_tsc_delay() {}
-#endif
 
 #endif /* _ASM_X86_DELAY_H */
-- 
cgit v1.2.3-70-g09d2


From edf10162b2c5ad78ada8e63e960f9d0949c6c219 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Fri, 13 Jun 2008 16:35:52 -0300
Subject: x86: don't clobber r8 nor use rcx.

There's really no reason to clobber r8 or pass the address in rcx.
We can safely use only two registers (which we already have to touch anyway)
to do the job.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/getuser_64.S    | 42 +++++++++++++++++++++---------------------
 include/asm-x86/uaccess_64.h |  3 +--
 2 files changed, 22 insertions(+), 23 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser_64.S
index 5448876261f..2b003d31348 100644
--- a/arch/x86/lib/getuser_64.S
+++ b/arch/x86/lib/getuser_64.S
@@ -36,10 +36,10 @@
 	.text
 ENTRY(__get_user_1)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
-	cmpq threadinfo_addr_limit(%r8),%rcx
+	GET_THREAD_INFO(%rdx)
+	cmpq threadinfo_addr_limit(%rdx),%rax
 	jae bad_get_user
-1:	movzb (%rcx),%edx
+1:	movzb (%rax),%edx
 	xorl %eax,%eax
 	ret
 	CFI_ENDPROC
@@ -47,48 +47,48 @@ ENDPROC(__get_user_1)
 
 ENTRY(__get_user_2)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
-	addq $1,%rcx
+	GET_THREAD_INFO(%rdx)
+	addq $1,%rax
 	jc 20f
-	cmpq threadinfo_addr_limit(%r8),%rcx
+	cmpq threadinfo_addr_limit(%rdx),%rax
 	jae 20f
-	decq   %rcx
-2:	movzwl (%rcx),%edx
+	decq   %rax
+2:	movzwl (%rax),%edx
 	xorl %eax,%eax
 	ret
-20:	decq    %rcx
+20:	decq    %rax
 	jmp	bad_get_user
 	CFI_ENDPROC
 ENDPROC(__get_user_2)
 
 ENTRY(__get_user_4)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
-	addq $3,%rcx
+	GET_THREAD_INFO(%rdx)
+	addq $3,%rax
 	jc 30f
-	cmpq threadinfo_addr_limit(%r8),%rcx
+	cmpq threadinfo_addr_limit(%rdx),%rax
 	jae 30f
-	subq $3,%rcx
-3:	movl (%rcx),%edx
+	subq $3,%rax
+3:	movl (%rax),%edx
 	xorl %eax,%eax
 	ret
-30:	subq $3,%rcx
+30:	subq $3,%rax
 	jmp bad_get_user
 	CFI_ENDPROC
 ENDPROC(__get_user_4)
 
 ENTRY(__get_user_8)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
-	addq $7,%rcx
+	GET_THREAD_INFO(%rdx)
+	addq $7,%rax
 	jc 40f
-	cmpq threadinfo_addr_limit(%r8),%rcx
+	cmpq threadinfo_addr_limit(%rdx),%rax
 	jae	40f
-	subq	$7,%rcx
-4:	movq (%rcx),%rdx
+	subq	$7,%rax
+4:	movq (%rax),%rdx
 	xorl %eax,%eax
 	ret
-40:	subq $7,%rcx
+40:	subq $7,%rax
 	jmp bad_get_user
 	CFI_ENDPROC
 ENDPROC(__get_user_8)
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index b8a2f433990..a2d49078e19 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -90,8 +90,7 @@ extern int fixup_exception(struct pt_regs *regs);
 #define __get_user_x(size, ret, x, ptr)		      \
 	asm volatile("call __get_user_" #size	      \
 		     : "=a" (ret),"=d" (x)	      \
-		     : "c" (ptr)		      \
-		     : "r8")
+		     : "0" (ptr))		      \
 
 /* Careful: we have to cast the result to the type of the pointer
  * for sign reasons */
-- 
cgit v1.2.3-70-g09d2


From 40faf463e62de0b29722910eded7dd26cd8b684b Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Tue, 24 Jun 2008 11:37:57 -0300
Subject: x86: introduce __ASM_REG macro.

There are situations in which the architecture wants to use the
register that represents its word-size, whatever it is. For those,
introduce __ASM_REG in asm.h, along with the first users _ASM_AX
and _ASM_DX. They have users waiting for it, namely the getuser
functions.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/getuser_32.S | 25 +++++++++++++------------
 arch/x86/lib/getuser_64.S | 36 ++++++++++++++++++------------------
 include/asm-x86/asm.h     |  3 +++
 3 files changed, 34 insertions(+), 30 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S
index 8200fde55f5..2cc3ceee8f9 100644
--- a/arch/x86/lib/getuser_32.S
+++ b/arch/x86/lib/getuser_32.S
@@ -11,6 +11,7 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/thread_info.h>
+#include <asm/asm.h>
 
 
 /*
@@ -28,10 +29,10 @@
 .text
 ENTRY(__get_user_1)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%edx)
-	cmp TI_addr_limit(%edx),%eax
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
-1:	movzb (%eax),%edx
+1:	movzb (%_ASM_AX),%edx
 	xor %eax,%eax
 	ret
 	CFI_ENDPROC
@@ -39,12 +40,12 @@ ENDPROC(__get_user_1)
 
 ENTRY(__get_user_2)
 	CFI_STARTPROC
-	add $1,%eax
+	add $1,%_ASM_AX
 	jc bad_get_user
-	GET_THREAD_INFO(%edx)
-	cmp TI_addr_limit(%edx),%eax
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
-2:	movzwl -1(%eax),%edx
+2:	movzwl -1(%_ASM_AX),%edx
 	xor %eax,%eax
 	ret
 	CFI_ENDPROC
@@ -52,12 +53,12 @@ ENDPROC(__get_user_2)
 
 ENTRY(__get_user_4)
 	CFI_STARTPROC
-	add $3,%eax
+	add $3,%_ASM_AX
 	jc bad_get_user
-	GET_THREAD_INFO(%edx)
-	cmp TI_addr_limit(%edx),%eax
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
-3:	mov -3(%eax),%edx
+3:	mov -3(%_ASM_AX),%edx
 	xor %eax,%eax
 	ret
 	CFI_ENDPROC
@@ -66,7 +67,7 @@ ENDPROC(__get_user_4)
 bad_get_user:
 	CFI_STARTPROC
 	xor %edx,%edx
-	mov $-14,%eax
+	mov $-14,%_ASM_AX
 	ret
 	CFI_ENDPROC
 END(bad_get_user)
diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser_64.S
index 6134752a75f..63b0e5c1e58 100644
--- a/arch/x86/lib/getuser_64.S
+++ b/arch/x86/lib/getuser_64.S
@@ -13,14 +13,13 @@
 /*
  * __get_user_X
  *
- * Inputs:	%rcx contains the address.
+ * Inputs:	%rax contains the address.
  *		The register is modified, but all changes are undone
  *		before returning because the C code doesn't know about it.
  *
  * Outputs:	%rax is error code (0 or -EFAULT)
  *		%rdx contains zero-extended value
  * 
- * %r8 is destroyed.
  *
  * These functions should not modify any other registers,
  * as they get called from within inline assembly.
@@ -32,14 +31,15 @@
 #include <asm/errno.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
+#include <asm/asm.h>
 
 	.text
 ENTRY(__get_user_1)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%rdx)
-	cmp TI_addr_limit(%rdx),%rax
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
-1:	movzb (%rax),%edx
+1:	movzb (%_ASM_AX),%edx
 	xor %eax,%eax
 	ret
 	CFI_ENDPROC
@@ -47,12 +47,12 @@ ENDPROC(__get_user_1)
 
 ENTRY(__get_user_2)
 	CFI_STARTPROC
-	add $1,%rax
+	add $1,%_ASM_AX
 	jc bad_get_user
-	GET_THREAD_INFO(%rdx)
-	cmp TI_addr_limit(%rdx),%rax
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
-2:	movzwl -1(%rax),%edx
+2:	movzwl -1(%_ASM_AX),%edx
 	xor %eax,%eax
 	ret
 	CFI_ENDPROC
@@ -60,12 +60,12 @@ ENDPROC(__get_user_2)
 
 ENTRY(__get_user_4)
 	CFI_STARTPROC
-	add $3,%rax
+	add $3,%_ASM_AX
 	jc bad_get_user
-	GET_THREAD_INFO(%rdx)
-	cmp TI_addr_limit(%rdx),%rax
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
-3:	mov -3(%rax),%edx
+3:	mov -3(%_ASM_AX),%edx
 	xor %eax,%eax
 	ret
 	CFI_ENDPROC
@@ -73,12 +73,12 @@ ENDPROC(__get_user_4)
 
 ENTRY(__get_user_8)
 	CFI_STARTPROC
-	add $7,%rax
+	add $7,%_ASM_AX
 	jc bad_get_user
-	GET_THREAD_INFO(%rdx)
-	cmp TI_addr_limit(%rdx),%rax
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae	bad_get_user
-4:	movq -7(%rax),%rdx
+4:	movq -7(%_ASM_AX),%_ASM_DX
 	xor %eax,%eax
 	ret
 	CFI_ENDPROC
@@ -87,7 +87,7 @@ ENDPROC(__get_user_8)
 bad_get_user:
 	CFI_STARTPROC
 	xor %edx,%edx
-	mov $(-EFAULT),%rax
+	mov $(-EFAULT),%_ASM_AX
 	ret
 	CFI_ENDPROC
 END(bad_get_user)
diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h
index 70939820c55..435402e623e 100644
--- a/include/asm-x86/asm.h
+++ b/include/asm-x86/asm.h
@@ -14,6 +14,7 @@
 #endif
 
 #define __ASM_SIZE(inst)	__ASM_SEL(inst##l, inst##q)
+#define __ASM_REG(reg)		__ASM_SEL(e##reg, r##reg)
 
 #define _ASM_PTR	__ASM_SEL(.long, .quad)
 #define _ASM_ALIGN	__ASM_SEL(.balign 4, .balign 8)
@@ -24,6 +25,8 @@
 #define _ASM_ADD	__ASM_SIZE(add)
 #define _ASM_SUB	__ASM_SIZE(sub)
 #define _ASM_XADD	__ASM_SIZE(xadd)
+#define _ASM_AX		__ASM_REG(ax)
+#define _ASM_DX		__ASM_REG(dx)
 
 /* Exception table entry */
 # define _ASM_EXTABLE(from,to) \
-- 
cgit v1.2.3-70-g09d2


From 6c2d458680d49d939ffd4b4cdc84d9e004d65910 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Tue, 24 Jun 2008 12:05:11 -0300
Subject: x86: merge getuser asm functions.

getuser_32.S and getuser_64.S are merged into getuser.S.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/Makefile     |   2 +-
 arch/x86/lib/getuser.S    | 104 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/lib/getuser_32.S |  79 -----------------------------------
 arch/x86/lib/getuser_64.S | 100 --------------------------------------------
 include/asm-x86/asm.h     |   4 +-
 5 files changed, 108 insertions(+), 181 deletions(-)
 create mode 100644 arch/x86/lib/getuser.S
 delete mode 100644 arch/x86/lib/getuser_32.S
 delete mode 100644 arch/x86/lib/getuser_64.S

(limited to 'include/asm-x86')

diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 86960a6c41c..e92948203a5 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_SMP) := msr-on-cpu.o
 
 lib-y := delay.o
-lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o
+lib-y += usercopy_$(BITS).o getuser.o putuser_$(BITS).o
 lib-y += memcpy_$(BITS).o
 
 ifeq ($(CONFIG_X86_32),y)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
new file mode 100644
index 00000000000..ad374003742
--- /dev/null
+++ b/arch/x86/lib/getuser.S
@@ -0,0 +1,104 @@
+/*
+ * __get_user functions.
+ *
+ * (C) Copyright 1998 Linus Torvalds
+ * (C) Copyright 2005 Andi Kleen
+ * (C) Copyright 2008 Glauber Costa
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+
+/*
+ * __get_user_X
+ *
+ * Inputs:	%[r|e]ax contains the address.
+ *		The register is modified, but all changes are undone
+ *		before returning because the C code doesn't know about it.
+ *
+ * Outputs:	%[r|e]ax is error code (0 or -EFAULT)
+ *		%[r|e]dx contains zero-extended value
+ *
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/page.h>
+#include <asm/errno.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/asm.h>
+
+	.text
+ENTRY(__get_user_1)
+	CFI_STARTPROC
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	jae bad_get_user
+1:	movzb (%_ASM_AX),%edx
+	xor %eax,%eax
+	ret
+	CFI_ENDPROC
+ENDPROC(__get_user_1)
+
+ENTRY(__get_user_2)
+	CFI_STARTPROC
+	add $1,%_ASM_AX
+	jc bad_get_user
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	jae bad_get_user
+2:	movzwl -1(%_ASM_AX),%edx
+	xor %eax,%eax
+	ret
+	CFI_ENDPROC
+ENDPROC(__get_user_2)
+
+ENTRY(__get_user_4)
+	CFI_STARTPROC
+	add $3,%_ASM_AX
+	jc bad_get_user
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	jae bad_get_user
+3:	mov -3(%_ASM_AX),%edx
+	xor %eax,%eax
+	ret
+	CFI_ENDPROC
+ENDPROC(__get_user_4)
+
+#ifdef CONFIG_X86_64
+ENTRY(__get_user_8)
+	CFI_STARTPROC
+	add $7,%_ASM_AX
+	jc bad_get_user
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	jae	bad_get_user
+4:	movq -7(%_ASM_AX),%_ASM_DX
+	xor %eax,%eax
+	ret
+	CFI_ENDPROC
+ENDPROC(__get_user_8)
+#endif
+
+bad_get_user:
+	CFI_STARTPROC
+	xor %edx,%edx
+	mov $(-EFAULT),%_ASM_AX
+	ret
+	CFI_ENDPROC
+END(bad_get_user)
+
+.section __ex_table,"a"
+	_ASM_PTR 1b,bad_get_user
+	_ASM_PTR 2b,bad_get_user
+	_ASM_PTR 3b,bad_get_user
+#ifdef CONFIG_X86_64
+	_ASM_PTR 4b,bad_get_user
+#endif
diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S
deleted file mode 100644
index 2bb0a183e06..00000000000
--- a/arch/x86/lib/getuser_32.S
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * __get_user functions.
- *
- * (C) Copyright 1998 Linus Torvalds
- *
- * These functions have a non-standard call interface
- * to make them more efficient, especially as they
- * return an error value in addition to the "real"
- * return value.
- */
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-#include <asm/thread_info.h>
-#include <asm/asm.h>
-
-
-/*
- * __get_user_X
- *
- * Inputs:	%eax contains the address
- *
- * Outputs:	%eax is error code (0 or -EFAULT)
- *		%edx contains zero-extended value
- *
- * These functions should not modify any other registers,
- * as they get called from within inline assembly.
- */
-
-.text
-ENTRY(__get_user_1)
-	CFI_STARTPROC
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
-	jae bad_get_user
-1:	movzb (%_ASM_AX),%edx
-	xor %eax,%eax
-	ret
-	CFI_ENDPROC
-ENDPROC(__get_user_1)
-
-ENTRY(__get_user_2)
-	CFI_STARTPROC
-	add $1,%_ASM_AX
-	jc bad_get_user
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
-	jae bad_get_user
-2:	movzwl -1(%_ASM_AX),%edx
-	xor %eax,%eax
-	ret
-	CFI_ENDPROC
-ENDPROC(__get_user_2)
-
-ENTRY(__get_user_4)
-	CFI_STARTPROC
-	add $3,%_ASM_AX
-	jc bad_get_user
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
-	jae bad_get_user
-3:	mov -3(%_ASM_AX),%edx
-	xor %eax,%eax
-	ret
-	CFI_ENDPROC
-ENDPROC(__get_user_4)
-
-bad_get_user:
-	CFI_STARTPROC
-	xor %edx,%edx
-	mov $-14,%_ASM_AX
-	ret
-	CFI_ENDPROC
-END(bad_get_user)
-
-.section __ex_table,"a"
-	_ASM_PTR 1b,bad_get_user
-	_ASM_PTR 2b,bad_get_user
-	_ASM_PTR 3b,bad_get_user
-.previous
diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser_64.S
deleted file mode 100644
index e33388419b7..00000000000
--- a/arch/x86/lib/getuser_64.S
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * __get_user functions.
- *
- * (C) Copyright 1998 Linus Torvalds
- * (C) Copyright 2005 Andi Kleen
- *
- * These functions have a non-standard call interface
- * to make them more efficient, especially as they
- * return an error value in addition to the "real"
- * return value.
- */
-
-/*
- * __get_user_X
- *
- * Inputs:	%rax contains the address.
- *		The register is modified, but all changes are undone
- *		before returning because the C code doesn't know about it.
- *
- * Outputs:	%rax is error code (0 or -EFAULT)
- *		%rdx contains zero-extended value
- * 
- *
- * These functions should not modify any other registers,
- * as they get called from within inline assembly.
- */
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-#include <asm/page.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/asm.h>
-
-	.text
-ENTRY(__get_user_1)
-	CFI_STARTPROC
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
-	jae bad_get_user
-1:	movzb (%_ASM_AX),%edx
-	xor %eax,%eax
-	ret
-	CFI_ENDPROC
-ENDPROC(__get_user_1)
-
-ENTRY(__get_user_2)
-	CFI_STARTPROC
-	add $1,%_ASM_AX
-	jc bad_get_user
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
-	jae bad_get_user
-2:	movzwl -1(%_ASM_AX),%edx
-	xor %eax,%eax
-	ret
-	CFI_ENDPROC
-ENDPROC(__get_user_2)
-
-ENTRY(__get_user_4)
-	CFI_STARTPROC
-	add $3,%_ASM_AX
-	jc bad_get_user
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
-	jae bad_get_user
-3:	mov -3(%_ASM_AX),%edx
-	xor %eax,%eax
-	ret
-	CFI_ENDPROC
-ENDPROC(__get_user_4)
-
-ENTRY(__get_user_8)
-	CFI_STARTPROC
-	add $7,%_ASM_AX
-	jc bad_get_user
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
-	jae	bad_get_user
-4:	movq -7(%_ASM_AX),%_ASM_DX
-	xor %eax,%eax
-	ret
-	CFI_ENDPROC
-ENDPROC(__get_user_8)
-
-bad_get_user:
-	CFI_STARTPROC
-	xor %edx,%edx
-	mov $(-EFAULT),%_ASM_AX
-	ret
-	CFI_ENDPROC
-END(bad_get_user)
-
-.section __ex_table,"a"
-	_ASM_PTR 1b,bad_get_user
-	_ASM_PTR 2b,bad_get_user
-	_ASM_PTR 3b,bad_get_user
-	_ASM_PTR 4b,bad_get_user
-.previous
diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h
index 435402e623e..57750a95685 100644
--- a/include/asm-x86/asm.h
+++ b/include/asm-x86/asm.h
@@ -3,8 +3,10 @@
 
 #ifdef __ASSEMBLY__
 # define __ASM_FORM(x)	x
+# define __ASM_EX_SEC	.section __ex_table
 #else
 # define __ASM_FORM(x)	" " #x " "
+# define __ASM_EX_SEC	" .section __ex_table,\"a\"\n"
 #endif
 
 #ifdef CONFIG_X86_32
@@ -30,7 +32,7 @@
 
 /* Exception table entry */
 # define _ASM_EXTABLE(from,to) \
-	" .section __ex_table,\"a\"\n" \
+	__ASM_EX_SEC	\
 	_ASM_ALIGN "\n" \
 	_ASM_PTR #from "," #to "\n" \
 	" .previous\n"
-- 
cgit v1.2.3-70-g09d2


From 268cf048c890d10bd3a86bd87922ed8a722d502f Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Tue, 24 Jun 2008 12:40:55 -0300
Subject: x86: don't save ebx in putuser_32.S.

Clobber it in the inline asm macros, and let the compiler do this for us.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/putuser_32.S    | 13 ++-----------
 include/asm-x86/uaccess_32.h | 10 +++++-----
 2 files changed, 7 insertions(+), 16 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser_32.S
index f58fba109d1..5b2a926f0e2 100644
--- a/arch/x86/lib/putuser_32.S
+++ b/arch/x86/lib/putuser_32.S
@@ -26,14 +26,8 @@
  */
 
 #define ENTER	CFI_STARTPROC ; \
-		pushl %ebx ; \
-		CFI_ADJUST_CFA_OFFSET 4 ; \
-		CFI_REL_OFFSET ebx, 0 ; \
 		GET_THREAD_INFO(%ebx)
-#define EXIT	popl %ebx ; \
-		CFI_ADJUST_CFA_OFFSET -4 ; \
-		CFI_RESTORE ebx ; \
-		ret ; \
+#define EXIT	ret ; \
 		CFI_ENDPROC
 
 .text
@@ -81,10 +75,7 @@ ENTRY(__put_user_8)
 ENDPROC(__put_user_8)
 
 bad_put_user:
-	CFI_STARTPROC simple
-	CFI_DEF_CFA esp, 2*4
-	CFI_OFFSET eip, -1*4
-	CFI_OFFSET ebx, -2*4
+	CFI_STARTPROC
 	movl $-14,%eax
 	EXIT
 END(bad_put_user)
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 8e7595c1f34..0ecfe47ad60 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -188,23 +188,23 @@ extern void __put_user_8(void);
 
 #define __put_user_1(x, ptr)					\
 	asm volatile("call __put_user_1" : "=a" (__ret_pu)	\
-		     : "0" ((typeof(*(ptr)))(x)), "c" (ptr))
+		     : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
 
 #define __put_user_2(x, ptr)					\
 	asm volatile("call __put_user_2" : "=a" (__ret_pu)	\
-		     : "0" ((typeof(*(ptr)))(x)), "c" (ptr))
+		     : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
 
 #define __put_user_4(x, ptr)					\
 	asm volatile("call __put_user_4" : "=a" (__ret_pu)	\
-		     : "0" ((typeof(*(ptr)))(x)), "c" (ptr))
+		     : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
 
 #define __put_user_8(x, ptr)					\
 	asm volatile("call __put_user_8" : "=a" (__ret_pu)	\
-		     : "A" ((typeof(*(ptr)))(x)), "c" (ptr))
+		     : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
 
 #define __put_user_X(x, ptr)					\
 	asm volatile("call __put_user_X" : "=a" (__ret_pu)	\
-		     : "c" (ptr))
+		     : "c" (ptr): "ebx")
 
 /**
  * put_user: - Write a simple value into user space.
-- 
cgit v1.2.3-70-g09d2


From 70706e432ee5618abf59381101d8dea7b8d97a7d Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Tue, 24 Jun 2008 15:02:31 -0300
Subject: x86: user put_user_x instead of all variants.

Follow the pattern, and define a single put_user_x, instead
of defining macros for all available sizes. Exception is
put_user_8, since the "A" constraint does not give us enough
power to specify which register (a or d) to use in the 32-bit
common case.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess_32.h | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 0ecfe47ad60..f8abc12a77c 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -186,25 +186,14 @@ extern void __put_user_2(void);
 extern void __put_user_4(void);
 extern void __put_user_8(void);
 
-#define __put_user_1(x, ptr)					\
-	asm volatile("call __put_user_1" : "=a" (__ret_pu)	\
-		     : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
-
-#define __put_user_2(x, ptr)					\
-	asm volatile("call __put_user_2" : "=a" (__ret_pu)	\
-		     : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
-
-#define __put_user_4(x, ptr)					\
-	asm volatile("call __put_user_4" : "=a" (__ret_pu)	\
-		     : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
+#define __put_user_x(size, x, ptr)				\
+	asm volatile("call __put_user_" #size : "=a" (__ret_pu)	\
+		     :"0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
 
 #define __put_user_8(x, ptr)					\
 	asm volatile("call __put_user_8" : "=a" (__ret_pu)	\
 		     : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
 
-#define __put_user_X(x, ptr)					\
-	asm volatile("call __put_user_X" : "=a" (__ret_pu)	\
-		     : "c" (ptr): "ebx")
 
 /**
  * put_user: - Write a simple value into user space.
@@ -232,19 +221,19 @@ extern void __put_user_8(void);
 	__pu_val = x;						\
 	switch (sizeof(*(ptr))) {				\
 	case 1:							\
-		__put_user_1(__pu_val, ptr);			\
+		__put_user_x(1, __pu_val, ptr);			\
 		break;						\
 	case 2:							\
-		__put_user_2(__pu_val, ptr);			\
+		__put_user_x(2, __pu_val, ptr);			\
 		break;						\
 	case 4:							\
-		__put_user_4(__pu_val, ptr);			\
+		__put_user_x(4, __pu_val, ptr);			\
 		break;						\
 	case 8:							\
 		__put_user_8(__pu_val, ptr);			\
 		break;						\
 	default:						\
-		__put_user_X(__pu_val, ptr);			\
+		__put_user_x(X, __pu_val, ptr);			\
 		break;						\
 	}							\
 	__ret_pu;						\
-- 
cgit v1.2.3-70-g09d2


From 770546b99fb99e71a3aa4181980d42664f9c18bd Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Tue, 24 Jun 2008 15:03:40 -0300
Subject: x86: clobber rbx in putuser_64.S.

Instead of clobbering r8, clobber rbx, which is the i386 way.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/putuser_64.S    | 18 +++++++++---------
 include/asm-x86/uaccess_64.h |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S
index 940796fa0d9..07028851064 100644
--- a/arch/x86/lib/putuser_64.S
+++ b/arch/x86/lib/putuser_64.S
@@ -18,7 +18,7 @@
  *
  * Outputs:	%rax is error code (0 or -EFAULT)
  *
- * %r8 is destroyed.
+ * %rbx is destroyed.
  *
  * These functions should not modify any other registers,
  * as they get called from within inline assembly.
@@ -34,8 +34,8 @@
 	.text
 ENTRY(__put_user_1)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
-	cmpq TI_addr_limit(%r8),%rcx
+	GET_THREAD_INFO(%rbx)
+	cmpq TI_addr_limit(%rbx),%rcx
 	jae bad_put_user
 1:	movb %dl,(%rcx)
 	xorl %eax,%eax
@@ -45,10 +45,10 @@ ENDPROC(__put_user_1)
 
 ENTRY(__put_user_2)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
+	GET_THREAD_INFO(%rbx)
 	addq $1,%rcx
 	jc 20f
-	cmpq TI_addr_limit(%r8),%rcx
+	cmpq TI_addr_limit(%rbx),%rcx
 	jae 20f
 	decq %rcx
 2:	movw %dx,(%rcx)
@@ -61,10 +61,10 @@ ENDPROC(__put_user_2)
 
 ENTRY(__put_user_4)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
+	GET_THREAD_INFO(%rbx)
 	addq $3,%rcx
 	jc 30f
-	cmpq TI_addr_limit(%r8),%rcx
+	cmpq TI_addr_limit(%rbx),%rcx
 	jae 30f
 	subq $3,%rcx
 3:	movl %edx,(%rcx)
@@ -77,10 +77,10 @@ ENDPROC(__put_user_4)
 
 ENTRY(__put_user_8)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
+	GET_THREAD_INFO(%rbx)
 	addq $7,%rcx
 	jc 40f
-	cmpq TI_addr_limit(%r8),%rcx
+	cmpq TI_addr_limit(%rbx),%rcx
 	jae 40f
 	subq $7,%rcx
 4:	movq %rdx,(%rcx)
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index a2d49078e19..21fda9ebee1 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -131,7 +131,7 @@ extern void __put_user_bad(void);
 	asm volatile("call __put_user_" #size				\
 		     :"=a" (ret)					\
 		     :"c" (ptr),"d" (x)					\
-		     :"r8")
+		     :"ebx")
 
 #define put_user(x, ptr)						\
 	__put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-- 
cgit v1.2.3-70-g09d2


From 0ada3164031162b4e1b7ff6b36ba8cc80ff7fe96 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Tue, 24 Jun 2008 16:44:39 -0300
Subject: x86: pass argument to putuser_64 functions in ax register.

This is consistent with i386 usage.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/putuser_64.S    | 8 ++++----
 include/asm-x86/uaccess_64.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S
index 07028851064..ce5fcd5d8c1 100644
--- a/arch/x86/lib/putuser_64.S
+++ b/arch/x86/lib/putuser_64.S
@@ -37,7 +37,7 @@ ENTRY(__put_user_1)
 	GET_THREAD_INFO(%rbx)
 	cmpq TI_addr_limit(%rbx),%rcx
 	jae bad_put_user
-1:	movb %dl,(%rcx)
+1:	movb %al,(%rcx)
 	xorl %eax,%eax
 	ret
 	CFI_ENDPROC
@@ -51,7 +51,7 @@ ENTRY(__put_user_2)
 	cmpq TI_addr_limit(%rbx),%rcx
 	jae 20f
 	decq %rcx
-2:	movw %dx,(%rcx)
+2:	movw %ax,(%rcx)
 	xorl %eax,%eax
 	ret
 20:	decq %rcx
@@ -67,7 +67,7 @@ ENTRY(__put_user_4)
 	cmpq TI_addr_limit(%rbx),%rcx
 	jae 30f
 	subq $3,%rcx
-3:	movl %edx,(%rcx)
+3:	movl %eax,(%rcx)
 	xorl %eax,%eax
 	ret
 30:	subq $3,%rcx
@@ -83,7 +83,7 @@ ENTRY(__put_user_8)
 	cmpq TI_addr_limit(%rbx),%rcx
 	jae 40f
 	subq $7,%rcx
-4:	movq %rdx,(%rcx)
+4:	movq %rax,(%rcx)
 	xorl %eax,%eax
 	ret
 40:	subq $7,%rcx
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 21fda9ebee1..f822a36cc1c 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -130,7 +130,7 @@ extern void __put_user_bad(void);
 #define __put_user_x(size, ret, x, ptr)					\
 	asm volatile("call __put_user_" #size				\
 		     :"=a" (ret)					\
-		     :"c" (ptr),"d" (x)					\
+		     :"c" (ptr),"a" (x)					\
 		     :"ebx")
 
 #define put_user(x, ptr)						\
-- 
cgit v1.2.3-70-g09d2


From 2528de431ddb200653d1dc6ca90074bad9520f09 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Tue, 24 Jun 2008 17:36:31 -0300
Subject: x86: use macros from asm.h.

In putuser_32.S and putuser_64.S, replace things like .quad, .long,
and explicit references to [r|e]ax for the apropriate macros
in asm/asm.h.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/putuser_32.S | 43 ++++++++++++++++++++++---------------------
 arch/x86/lib/putuser_64.S | 41 +++++++++++++++++++++--------------------
 include/asm-x86/asm.h     |  2 ++
 3 files changed, 45 insertions(+), 41 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser_32.S
index b67a37cab1b..e7eda34feb3 100644
--- a/arch/x86/lib/putuser_32.S
+++ b/arch/x86/lib/putuser_32.S
@@ -11,6 +11,7 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/thread_info.h>
+#include <asm/asm.h>
 
 
 /*
@@ -26,50 +27,50 @@
  */
 
 #define ENTER	CFI_STARTPROC ; \
-		GET_THREAD_INFO(%ebx)
+		GET_THREAD_INFO(%_ASM_BX)
 #define EXIT	ret ; \
 		CFI_ENDPROC
 
 .text
 ENTRY(__put_user_1)
 	ENTER
-	cmp TI_addr_limit(%ebx),%ecx
+	cmp TI_addr_limit(%_ASM_BX),%_ASM_CX
 	jae bad_put_user
-1:	movb %al,(%ecx)
+1:	movb %al,(%_ASM_CX)
 	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_1)
 
 ENTRY(__put_user_2)
 	ENTER
-	mov TI_addr_limit(%ebx),%ebx
-	sub $1,%ebx
-	cmp %ebx,%ecx
+	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	sub $1,%_ASM_BX
+	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
-2:	movw %ax,(%ecx)
+2:	movw %ax,(%_ASM_CX)
 	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_2)
 
 ENTRY(__put_user_4)
 	ENTER
-	mov TI_addr_limit(%ebx),%ebx
-	sub $3,%ebx
-	cmp %ebx,%ecx
+	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	sub $3,%_ASM_BX
+	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
-3:	movl %eax,(%ecx)
+3:	movl %eax,(%_ASM_CX)
 	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_4)
 
 ENTRY(__put_user_8)
 	ENTER
-	mov TI_addr_limit(%ebx),%ebx
-	sub $7,%ebx
-	cmp %ebx,%ecx
+	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	sub $7,%_ASM_BX
+	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
-4:	movl %eax,(%ecx)
-5:	movl %edx,4(%ecx)
+4:	movl %_ASM_AX,(%_ASM_CX)
+5:	movl %edx,4(%_ASM_CX)
 	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_8)
@@ -81,9 +82,9 @@ bad_put_user:
 END(bad_put_user)
 
 .section __ex_table,"a"
-	.long 1b,bad_put_user
-	.long 2b,bad_put_user
-	.long 3b,bad_put_user
-	.long 4b,bad_put_user
-	.long 5b,bad_put_user
+	_ASM_PTR 1b,bad_put_user
+	_ASM_PTR 2b,bad_put_user
+	_ASM_PTR 3b,bad_put_user
+	_ASM_PTR 4b,bad_put_user
+	_ASM_PTR 5b,bad_put_user
 .previous
diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S
index c18fc0f5256..d496cc8e730 100644
--- a/arch/x86/lib/putuser_64.S
+++ b/arch/x86/lib/putuser_64.S
@@ -30,64 +30,65 @@
 #include <asm/errno.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
+#include <asm/asm.h>
 
 #define ENTER   CFI_STARTPROC ; \
-                GET_THREAD_INFO(%rbx)
+                GET_THREAD_INFO(%_ASM_BX)
 #define EXIT    ret ; \
                 CFI_ENDPROC
 
 	.text
 ENTRY(__put_user_1)
 	ENTER
-	cmp TI_addr_limit(%rbx),%rcx
+	cmp TI_addr_limit(%_ASM_BX),%_ASM_CX
 	jae bad_put_user
-1:	movb %al,(%rcx)
+1:	movb %al,(%_ASM_CX)
 	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_1)
 
 ENTRY(__put_user_2)
 	ENTER
-	mov TI_addr_limit(%rbx),%rbx
-	sub  $1, %rbx
-	cmp %rbx ,%rcx
+	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	sub  $1, %_ASM_BX
+	cmp %_ASM_BX ,%_ASM_CX
 	jae  bad_put_user
-2:	movw %ax,(%rcx)
+2:	movw %ax,(%_ASM_CX)
 	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_2)
 
 ENTRY(__put_user_4)
 	ENTER
-	mov TI_addr_limit(%rbx),%rbx
-	sub  $3, %rbx
-	cmp  %rbx, %rcx
+	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	sub  $3, %_ASM_BX
+	cmp  %_ASM_BX, %_ASM_CX
 	jae bad_put_user
-3:	movl %eax,(%rcx)
+3:	movl %eax,(%_ASM_CX)
 	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_4)
 
 ENTRY(__put_user_8)
 	ENTER
-	mov TI_addr_limit(%rbx),%rbx
-	sub  $7, %rbx
-	cmp  %rbx, %rcx
+	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	sub  $7, %_ASM_BX
+	cmp  %_ASM_BX, %_ASM_CX
 	jae bad_put_user
-4:	movq %rax,(%rcx)
+4:	movq %_ASM_AX,(%_ASM_CX)
 	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_8)
 
 bad_put_user:
 	CFI_STARTPROC
-	mov $(-EFAULT),%rax
+	mov $(-EFAULT),%eax
 	EXIT
 END(bad_put_user)
 
 .section __ex_table,"a"
-	.quad 1b,bad_put_user
-	.quad 2b,bad_put_user
-	.quad 3b,bad_put_user
-	.quad 4b,bad_put_user
+	_ASM_PTR 1b,bad_put_user
+	_ASM_PTR 2b,bad_put_user
+	_ASM_PTR 3b,bad_put_user
+	_ASM_PTR 4b,bad_put_user
 .previous
diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h
index 57750a95685..97220321f39 100644
--- a/include/asm-x86/asm.h
+++ b/include/asm-x86/asm.h
@@ -28,6 +28,8 @@
 #define _ASM_SUB	__ASM_SIZE(sub)
 #define _ASM_XADD	__ASM_SIZE(xadd)
 #define _ASM_AX		__ASM_REG(ax)
+#define _ASM_BX		__ASM_REG(bx)
+#define _ASM_CX		__ASM_REG(cx)
 #define _ASM_DX		__ASM_REG(dx)
 
 /* Exception table entry */
-- 
cgit v1.2.3-70-g09d2


From 53938a68a2f971058bc53aaa8c70f9f24f684cdc Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 25 Jun 2008 10:14:13 -0300
Subject: x86: commonize __range_not_ok.

For i386, __range_not_ok is a better name than __range_ok, since
it returns 0 when it is in fact okay. Other than that,
both versions does not need the word size specifiers, and we remove them.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess_32.h | 6 +++---
 include/asm-x86/uaccess_64.h | 3 +--
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index f8abc12a77c..8d3f02d3562 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -56,11 +56,11 @@ extern struct movsl_mask {
  *
  * This needs 33-bit arithmetic. We have a carry...
  */
-#define __range_ok(addr, size)						\
+#define __range_not_ok(addr, size)					\
 ({									\
 	unsigned long flag, roksum;					\
 	__chk_user_ptr(addr);						\
-	asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0"		\
+	asm("add %3,%1 ; sbb %0,%0; cmp %1,%4; sbb $0,%0"		\
 	    :"=&r" (flag), "=r" (roksum)				\
 	    :"1" (addr), "g" ((int)(size)),				\
 	    "rm" (current_thread_info()->addr_limit.seg));		\
@@ -86,7 +86,7 @@ extern struct movsl_mask {
  * checks that the pointer is in the user space range - after calling
  * this function, memory access functions may still return -EFAULT.
  */
-#define access_ok(type, addr, size) (likely(__range_ok(addr, size) == 0))
+#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
 
 /*
  * The exception table consists of pairs of addresses: the first is the
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index f822a36cc1c..012cba3972d 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -41,8 +41,7 @@
 ({									\
 	unsigned long flag, roksum;					\
 	__chk_user_ptr(addr);						\
-	asm("# range_ok\n\r"						\
-	    "addq %3,%1 ; sbbq %0,%0 ; cmpq %1,%4 ; sbbq $0,%0"		\
+	asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0"		\
 	    : "=&r" (flag), "=r" (roksum)				\
 	    : "1" (addr), "g" ((long)(size)),				\
 	      "g" (current_thread_info()->addr_limit.seg));		\
-- 
cgit v1.2.3-70-g09d2


From c28b95d9bb7da0c3be22826c56a05899e21e5ece Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Mon, 30 Jun 2008 17:07:51 -0300
Subject: x86: change asm constraint.

Our integration efforts broke a build with this function being used
with i386. Reason is "g" can put the operand in an imm32, which according
to The Book (tm), is invalid as the second operand.

This is actually a bug
in x86_64 too, since the x86_64 instruction set reference does not list
it as valid.

We probably didn't trigger this before due to the ammount of
registers available for 64-bit platforms. But that's just my guess.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess_64.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 012cba3972d..dc6dde05838 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -44,7 +44,7 @@
 	asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0"		\
 	    : "=&r" (flag), "=r" (roksum)				\
 	    : "1" (addr), "g" ((long)(size)),				\
-	      "g" (current_thread_info()->addr_limit.seg));		\
+	      "rm" (current_thread_info()->addr_limit.seg));		\
 	flag;								\
 })
 
-- 
cgit v1.2.3-70-g09d2


From 8b0a8aaf05325a1a96f53b45708f77599da35161 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Mon, 30 Jun 2008 17:34:39 -0300
Subject: x86: introduce likely in macro.

Put the likely hint in access_ok. Just for
bisectability.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess_64.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index dc6dde05838..0077dbe9359 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -48,7 +48,7 @@
 	flag;								\
 })
 
-#define access_ok(type, addr, size) (__range_not_ok(addr, size) == 0)
+#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
 
 /*
  * The exception table consists of pairs of addresses: the first is the
-- 
cgit v1.2.3-70-g09d2


From 1dc186e82c1ab476ef83080adca43a70969b01cd Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Mon, 30 Jun 2008 17:37:08 -0300
Subject: x86: use long instead of int.

Do not refer to the processor word-size with int, as it won't
work with x86_64. Use long instead.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess_32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 8d3f02d3562..eed8d9422b4 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -62,7 +62,7 @@ extern struct movsl_mask {
 	__chk_user_ptr(addr);						\
 	asm("add %3,%1 ; sbb %0,%0; cmp %1,%4; sbb $0,%0"		\
 	    :"=&r" (flag), "=r" (roksum)				\
-	    :"1" (addr), "g" ((int)(size)),				\
+	    :"1" (addr), "g" ((long)(size)),				\
 	    "rm" (current_thread_info()->addr_limit.seg));		\
 	flag;								\
 })
-- 
cgit v1.2.3-70-g09d2


From be9d06bfd48934fbd56ccb7476eabccfa31b4afe Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Fri, 13 Jun 2008 21:01:46 -0300
Subject: x86: use something common for both architectures.

Using explicit hexa (0xFFFFFFUL) introduces an unnecessary difference
between i386 and x86_64 because of the size of their long. Use -1UL instead.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess_32.h | 2 +-
 include/asm-x86/uaccess_64.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index eed8d9422b4..2676b48ac0f 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -25,7 +25,7 @@
 #define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
 
 
-#define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFFUL)
+#define KERNEL_DS	MAKE_MM_SEG(-1UL)
 #define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
 
 #define get_ds()	(KERNEL_DS)
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 0077dbe9359..3a81775136c 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -22,7 +22,7 @@
 
 #define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
 
-#define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFFFFFFFFFFUL)
+#define KERNEL_DS	MAKE_MM_SEG(-1UL)
 #define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
 
 #define get_ds()	(KERNEL_DS)
-- 
cgit v1.2.3-70-g09d2


From ca23386216b9d4fc3bb211101205077d2b2916ae Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Fri, 13 Jun 2008 14:39:25 -0300
Subject: x86: merge common parts of uaccess.

Common parts of uaccess_32.h and uaccess_64.h
are put in uaccess.h. Bits in uaccess_32.h and
uaccess_64.h that come to this file are equal
except for comments and whitespaces differences.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess.h    | 124 +++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/uaccess_32.h | 110 --------------------------------------
 include/asm-x86/uaccess_64.h |  83 -----------------------------
 3 files changed, 124 insertions(+), 193 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 9fefd2947e7..2fc30c2a8a9 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -1,5 +1,129 @@
+#ifndef _ASM_UACCES_H_
+#define _ASM_UACCES_H_
+/*
+ * User space memory access functions
+ */
+#include <linux/errno.h>
+#include <linux/compiler.h>
+#include <linux/thread_info.h>
+#include <linux/prefetch.h>
+#include <linux/string.h>
+#include <asm/asm.h>
+#include <asm/page.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
+
+#define KERNEL_DS	MAKE_MM_SEG(-1UL)
+#define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds()	(KERNEL_DS)
+#define get_fs()	(current_thread_info()->addr_limit)
+#define set_fs(x)	(current_thread_info()->addr_limit = (x))
+
+#define segment_eq(a, b)	((a).seg == (b).seg)
+
+/*
+ * Test whether a block of memory is a valid user space address.
+ * Returns 0 if the range is valid, nonzero otherwise.
+ *
+ * This is equivalent to the following test:
+ * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64)
+ *
+ * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
+ */
+
+#define __range_not_ok(addr, size)					\
+({									\
+	unsigned long flag, roksum;					\
+	__chk_user_ptr(addr);						\
+	asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0"		\
+	    : "=&r" (flag), "=r" (roksum)				\
+	    : "1" (addr), "g" ((long)(size)),				\
+	      "rm" (current_thread_info()->addr_limit.seg));		\
+	flag;								\
+})
+
+/**
+ * access_ok: - Checks if a user space pointer is valid
+ * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE.  Note that
+ *        %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
+ *        to write to a block, it is always safe to read from it.
+ * @addr: User space pointer to start of block to check
+ * @size: Size of block to check
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Checks if a pointer to a block of memory in user space is valid.
+ *
+ * Returns true (nonzero) if the memory block may be valid, false (zero)
+ * if it is definitely invalid.
+ *
+ * Note that, depending on architecture, this function probably just
+ * checks that the pointer is in the user space range - after calling
+ * this function, memory access functions may still return -EFAULT.
+ */
+#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry {
+	unsigned long insn, fixup;
+};
+
+extern int fixup_exception(struct pt_regs *regs);
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ *
+ * This gets kind of ugly. We want to return _two_ values in "get_user()"
+ * and yet we don't want to do any pointers, because that is too much
+ * of a performance impact. Thus we have a few rather ugly macros here,
+ * and hide all the ugliness from the user.
+ *
+ * The "__xxx" versions of the user access functions are versions that
+ * do not verify the address space, that must have been done previously
+ * with a separate "access_ok()" call (this is used when we do multiple
+ * accesses to the same area of user memory).
+ */
+
+extern int __get_user_1(void);
+extern int __get_user_2(void);
+extern int __get_user_4(void);
+extern int __get_user_8(void);
+extern int __get_user_bad(void);
+
+#define __get_user_x(size, ret, x, ptr)		      \
+	asm volatile("call __get_user_" #size	      \
+		     : "=a" (ret),"=d" (x)	      \
+		     : "0" (ptr))		      \
+
 #ifdef CONFIG_X86_32
 # include "uaccess_32.h"
 #else
 # include "uaccess_64.h"
 #endif
+
+#endif
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 2676b48ac0f..92ad19e7098 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -11,29 +11,6 @@
 #include <asm/asm.h>
 #include <asm/page.h>
 
-#define VERIFY_READ 0
-#define VERIFY_WRITE 1
-
-/*
- * The fs value determines whether argument validity checking should be
- * performed or not.  If get_fs() == USER_DS, checking is performed, with
- * get_fs() == KERNEL_DS, checking is bypassed.
- *
- * For historical reasons, these macros are grossly misnamed.
- */
-
-#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
-
-
-#define KERNEL_DS	MAKE_MM_SEG(-1UL)
-#define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
-
-#define get_ds()	(KERNEL_DS)
-#define get_fs()	(current_thread_info()->addr_limit)
-#define set_fs(x)	(current_thread_info()->addr_limit = (x))
-
-#define segment_eq(a, b)	((a).seg == (b).seg)
-
 /*
  * movsl can be slow when source and dest are not both 8-byte aligned
  */
@@ -47,91 +24,6 @@ extern struct movsl_mask {
 	((unsigned long __force)(addr) <		\
 	 (current_thread_info()->addr_limit.seg))
 
-/*
- * Test whether a block of memory is a valid user space address.
- * Returns 0 if the range is valid, nonzero otherwise.
- *
- * This is equivalent to the following test:
- * (u33)addr + (u33)size >= (u33)current->addr_limit.seg
- *
- * This needs 33-bit arithmetic. We have a carry...
- */
-#define __range_not_ok(addr, size)					\
-({									\
-	unsigned long flag, roksum;					\
-	__chk_user_ptr(addr);						\
-	asm("add %3,%1 ; sbb %0,%0; cmp %1,%4; sbb $0,%0"		\
-	    :"=&r" (flag), "=r" (roksum)				\
-	    :"1" (addr), "g" ((long)(size)),				\
-	    "rm" (current_thread_info()->addr_limit.seg));		\
-	flag;								\
-})
-
-/**
- * access_ok: - Checks if a user space pointer is valid
- * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE.  Note that
- *        %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
- *        to write to a block, it is always safe to read from it.
- * @addr: User space pointer to start of block to check
- * @size: Size of block to check
- *
- * Context: User context only.  This function may sleep.
- *
- * Checks if a pointer to a block of memory in user space is valid.
- *
- * Returns true (nonzero) if the memory block may be valid, false (zero)
- * if it is definitely invalid.
- *
- * Note that, depending on architecture, this function probably just
- * checks that the pointer is in the user space range - after calling
- * this function, memory access functions may still return -EFAULT.
- */
-#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
-
-/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
- *
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path.  This means when everything is well,
- * we don't even have to jump over them.  Further, they do not intrude
- * on our cache or tlb entries.
- */
-
-struct exception_table_entry {
-	unsigned long insn, fixup;
-};
-
-extern int fixup_exception(struct pt_regs *regs);
-
-/*
- * These are the main single-value transfer routines.  They automatically
- * use the right size if we just have the right pointer type.
- *
- * This gets kind of ugly. We want to return _two_ values in "get_user()"
- * and yet we don't want to do any pointers, because that is too much
- * of a performance impact. Thus we have a few rather ugly macros here,
- * and hide all the ugliness from the user.
- *
- * The "__xxx" versions of the user access functions are versions that
- * do not verify the address space, that must have been done previously
- * with a separate "access_ok()" call (this is used when we do multiple
- * accesses to the same area of user memory).
- */
-
-extern void __get_user_1(void);
-extern void __get_user_2(void);
-extern void __get_user_4(void);
-
-#define __get_user_x(size, ret, x, ptr)	      \
-	asm volatile("call __get_user_" #size \
-		     :"=a" (ret),"=d" (x)     \
-		     :"0" (ptr))
-
-
 /* Careful: we have to cast the result to the type of the pointer
  * for sign reasons */
 
@@ -386,8 +278,6 @@ struct __large_struct { unsigned long buf[100]; };
 	__gu_err;							\
 })
 
-extern long __get_user_bad(void);
-
 #define __get_user_size(x, ptr, size, retval, errret)			\
 do {									\
 	retval = 0;							\
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 3a81775136c..243dbb467f3 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -9,88 +9,11 @@
 #include <linux/prefetch.h>
 #include <asm/page.h>
 
-#define VERIFY_READ 0
-#define VERIFY_WRITE 1
-
-/*
- * The fs value determines whether argument validity checking should be
- * performed or not.  If get_fs() == USER_DS, checking is performed, with
- * get_fs() == KERNEL_DS, checking is bypassed.
- *
- * For historical reasons, these macros are grossly misnamed.
- */
-
-#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
-
-#define KERNEL_DS	MAKE_MM_SEG(-1UL)
-#define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
-
-#define get_ds()	(KERNEL_DS)
-#define get_fs()	(current_thread_info()->addr_limit)
-#define set_fs(x)	(current_thread_info()->addr_limit = (x))
-
-#define segment_eq(a, b)	((a).seg == (b).seg)
-
 #define __addr_ok(addr) (!((unsigned long)(addr) &			\
 			   (current_thread_info()->addr_limit.seg)))
 
-/*
- * Uhhuh, this needs 65-bit arithmetic. We have a carry..
- */
-#define __range_not_ok(addr, size)					\
-({									\
-	unsigned long flag, roksum;					\
-	__chk_user_ptr(addr);						\
-	asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0"		\
-	    : "=&r" (flag), "=r" (roksum)				\
-	    : "1" (addr), "g" ((long)(size)),				\
-	      "rm" (current_thread_info()->addr_limit.seg));		\
-	flag;								\
-})
-
-#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
-
-/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
- *
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path.  This means when everything is well,
- * we don't even have to jump over them.  Further, they do not intrude
- * on our cache or tlb entries.
- */
-
-struct exception_table_entry {
-	unsigned long insn, fixup;
-};
-
-extern int fixup_exception(struct pt_regs *regs);
-
 #define ARCH_HAS_SEARCH_EXTABLE
 
-/*
- * These are the main single-value transfer routines.  They automatically
- * use the right size if we just have the right pointer type.
- *
- * This gets kind of ugly. We want to return _two_ values in "get_user()"
- * and yet we don't want to do any pointers, because that is too much
- * of a performance impact. Thus we have a few rather ugly macros here,
- * and hide all the ugliness from the user.
- *
- * The "__xxx" versions of the user access functions are versions that
- * do not verify the address space, that must have been done previously
- * with a separate "access_ok()" call (this is used when we do multiple
- * accesses to the same area of user memory).
- */
-
-#define __get_user_x(size, ret, x, ptr)		      \
-	asm volatile("call __get_user_" #size	      \
-		     : "=a" (ret),"=d" (x)	      \
-		     : "0" (ptr))		      \
-
 /* Careful: we have to cast the result to the type of the pointer
  * for sign reasons */
 
@@ -226,12 +149,6 @@ struct __large_struct { unsigned long buf[100]; };
 	__gu_err;						\
 })
 
-extern int __get_user_1(void);
-extern int __get_user_2(void);
-extern int __get_user_4(void);
-extern int __get_user_8(void);
-extern int __get_user_bad(void);
-
 #define __get_user_size(x, ptr, size, retval)				\
 do {									\
 	retval = 0;							\
-- 
cgit v1.2.3-70-g09d2


From 865e5b76505cdade261773bde32f785b3ce579f1 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 25 Jun 2008 11:05:11 -0300
Subject: x86: merge getuser.

Merge versions of getuser from uaccess_32.h and uaccess_64.h into
uaccess.h. There is a part which is 64-bit only (for now), and for
that, we use a __get_user_8 macro.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess.h    | 55 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/uaccess_32.h | 43 ----------------------------------
 include/asm-x86/uaccess_64.h | 29 -----------------------
 3 files changed, 55 insertions(+), 72 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 2fc30c2a8a9..8cb580c0b41 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -120,6 +120,61 @@ extern int __get_user_bad(void);
 		     : "=a" (ret),"=d" (x)	      \
 		     : "0" (ptr))		      \
 
+/* Careful: we have to cast the result to the type of the pointer
+ * for sign reasons */
+
+/**
+ * get_user: - Get a simple variable from user space.
+ * @x:   Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#ifdef CONFIG_X86_32
+#define __get_user_8(__ret_gu, __val_gu, ptr)				\
+		__get_user_x(X, __ret_gu, __val_gu, ptr)
+#else
+#define __get_user_8(__ret_gu, __val_gu, ptr)				\
+		__get_user_x(8, __ret_gu, __val_gu, ptr)
+#endif
+
+#define get_user(x, ptr)						\
+({									\
+	int __ret_gu;							\
+	unsigned long __val_gu;						\
+	__chk_user_ptr(ptr);						\
+	switch (sizeof(*(ptr))) {					\
+	case 1:								\
+		__get_user_x(1, __ret_gu, __val_gu, ptr);		\
+		break;							\
+	case 2:								\
+		__get_user_x(2, __ret_gu, __val_gu, ptr);		\
+		break;							\
+	case 4:								\
+		__get_user_x(4, __ret_gu, __val_gu, ptr);		\
+		break;							\
+	case 8:								\
+		__get_user_8(__ret_gu, __val_gu, ptr);			\
+		break;							\
+	default:							\
+		__get_user_x(X, __ret_gu, __val_gu, ptr);		\
+		break;							\
+	}								\
+	(x) = (__typeof__(*(ptr)))__val_gu;				\
+	__ret_gu;							\
+})
+
+
 #ifdef CONFIG_X86_32
 # include "uaccess_32.h"
 #else
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 92ad19e7098..3cc32369443 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -24,49 +24,6 @@ extern struct movsl_mask {
 	((unsigned long __force)(addr) <		\
 	 (current_thread_info()->addr_limit.seg))
 
-/* Careful: we have to cast the result to the type of the pointer
- * for sign reasons */
-
-/**
- * get_user: - Get a simple variable from user space.
- * @x:   Variable to store result.
- * @ptr: Source address, in user space.
- *
- * Context: User context only.  This function may sleep.
- *
- * This macro copies a single simple variable from user space to kernel
- * space.  It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and the result of
- * dereferencing @ptr must be assignable to @x without a cast.
- *
- * Returns zero on success, or -EFAULT on error.
- * On error, the variable @x is set to zero.
- */
-#define get_user(x, ptr)						\
-({									\
-	int __ret_gu;							\
-	unsigned long __val_gu;						\
-	__chk_user_ptr(ptr);						\
-	switch (sizeof(*(ptr))) {					\
-	case 1:								\
-		__get_user_x(1, __ret_gu, __val_gu, ptr);		\
-		break;							\
-	case 2:								\
-		__get_user_x(2, __ret_gu, __val_gu, ptr);		\
-		break;							\
-	case 4:								\
-		__get_user_x(4, __ret_gu, __val_gu, ptr);		\
-		break;							\
-	default:							\
-		__get_user_x(X, __ret_gu, __val_gu, ptr);		\
-		break;							\
-	}								\
-	(x) = (__typeof__(*(ptr)))__val_gu;				\
-	__ret_gu;							\
-})
-
 extern void __put_user_bad(void);
 
 /*
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 243dbb467f3..4a44b906e4c 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -14,35 +14,6 @@
 
 #define ARCH_HAS_SEARCH_EXTABLE
 
-/* Careful: we have to cast the result to the type of the pointer
- * for sign reasons */
-
-#define get_user(x, ptr)						\
-({									\
-	unsigned long __val_gu;						\
-	int __ret_gu;							\
-	__chk_user_ptr(ptr);						\
-	switch (sizeof(*(ptr))) {					\
-	case 1:								\
-		__get_user_x(1, __ret_gu, __val_gu, ptr);		\
-		break;							\
-	case 2:								\
-		__get_user_x(2, __ret_gu, __val_gu, ptr);		\
-		break;							\
-	case 4:								\
-		__get_user_x(4, __ret_gu, __val_gu, ptr);		\
-		break;							\
-	case 8:								\
-		__get_user_x(8, __ret_gu, __val_gu, ptr);		\
-		break;							\
-	default:							\
-		__get_user_bad();					\
-		break;							\
-	}								\
-	(x) = (__force typeof(*(ptr)))__val_gu;				\
-	__ret_gu;							\
-})
-
 extern void __put_user_1(void);
 extern void __put_user_2(void);
 extern void __put_user_4(void);
-- 
cgit v1.2.3-70-g09d2


From 002ca1690c3d0a495e6aedd608281aeb01ce6385 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 25 Jun 2008 11:08:51 -0300
Subject: x86: move __addr_ok to uaccess.h.

Take it out of uaccess_32.h. Since it seems that no users
of the x86_64 exists, we simply pick the i386 version.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess.h    | 4 ++++
 include/asm-x86/uaccess_32.h | 4 ----
 include/asm-x86/uaccess_64.h | 3 ---
 3 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 8cb580c0b41..8a1e45fdc98 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -33,6 +33,10 @@
 
 #define segment_eq(a, b)	((a).seg == (b).seg)
 
+#define __addr_ok(addr)					\
+	((unsigned long __force)(addr) <		\
+	 (current_thread_info()->addr_limit.seg))
+
 /*
  * Test whether a block of memory is a valid user space address.
  * Returns 0 if the range is valid, nonzero otherwise.
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 3cc32369443..87b1aede9d4 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -20,10 +20,6 @@ extern struct movsl_mask {
 } ____cacheline_aligned_in_smp movsl_mask;
 #endif
 
-#define __addr_ok(addr)					\
-	((unsigned long __force)(addr) <		\
-	 (current_thread_info()->addr_limit.seg))
-
 extern void __put_user_bad(void);
 
 /*
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 4a44b906e4c..81308763a4c 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -9,9 +9,6 @@
 #include <linux/prefetch.h>
 #include <asm/page.h>
 
-#define __addr_ok(addr) (!((unsigned long)(addr) &			\
-			   (current_thread_info()->addr_limit.seg)))
-
 #define ARCH_HAS_SEARCH_EXTABLE
 
 extern void __put_user_1(void);
-- 
cgit v1.2.3-70-g09d2


From 5e322163b19735fbef3e294c297d38e0d2ba8f7e Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 25 Jun 2008 11:35:06 -0300
Subject: x86: use k modifier for 4-byte access.

Do it in a separate patch for bisectability.
Goal is to have put_user_size integrated.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess_32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 87b1aede9d4..4c47a5ba65e 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -180,7 +180,7 @@ do {									\
 		__put_user_asm(x, ptr, retval, "w", "w", "ir", errret);	\
 		break;							\
 	case 4:								\
-		__put_user_asm(x, ptr, retval, "l", "",  "ir", errret);	\
+		__put_user_asm(x, ptr, retval, "l", "k",  "ir", errret);\
 		break;							\
 	case 8:								\
 		__put_user_u64((__typeof__(*ptr))(x), ptr, retval);	\
-- 
cgit v1.2.3-70-g09d2


From d42e6af613375be7a9a431628ecd742e87230554 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 25 Jun 2008 12:00:44 -0300
Subject: x86: don't always use EFAULT on __put_user_size.

Let the user of the macro specify the desired return.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess_64.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 81308763a4c..6532d638d5a 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -37,7 +37,7 @@ extern void __put_user_bad(void);
 #define __put_user_nocheck(x, ptr, size)		\
 ({							\
 	int __pu_err;					\
-	__put_user_size((x), (ptr), (size), __pu_err);	\
+	__put_user_size((x), (ptr), (size), __pu_err, -EFAULT);	\
 	__pu_err;					\
 })
 
@@ -65,22 +65,22 @@ extern void __put_user_bad(void);
 	__pu_err;						\
 })
 
-#define __put_user_size(x, ptr, size, retval)				\
+#define __put_user_size(x, ptr, size, retval, errret)			\
 do {									\
 	retval = 0;							\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
 	case 1:								\
-		__put_user_asm(x, ptr, retval, "b", "b", "iq", -EFAULT);\
+		__put_user_asm(x, ptr, retval, "b", "b", "iq", errret);\
 		break;							\
 	case 2:								\
-		__put_user_asm(x, ptr, retval, "w", "w", "ir", -EFAULT);\
+		__put_user_asm(x, ptr, retval, "w", "w", "ir", errret);\
 		break;							\
 	case 4:								\
-		__put_user_asm(x, ptr, retval, "l", "k", "ir", -EFAULT);\
+		__put_user_asm(x, ptr, retval, "l", "k", "ir", errret);\
 		break;							\
 	case 8:								\
-		__put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT);	\
+		__put_user_asm(x, ptr, retval, "q", "", "Zr", errret);	\
 		break;							\
 	default:							\
 		__put_user_bad();					\
-- 
cgit v1.2.3-70-g09d2


From dc70ddf4098de043ac52f623c7573a11f2ae3d09 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 25 Jun 2008 11:48:29 -0300
Subject: x86: merge __put_user_asm and its user.

Move both __put_user_asm and __put_user_size to
uaccess.h. i386 already had a special function for 64-bit access,
so for x86_64, we just define a macro with the same name.
Note that for X86_64, CONFIG_X86_WP_WORKS_OK will always
be defined, so the #else part will never be even compiled in.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess.h    | 84 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/uaccess_32.h | 77 ----------------------------------------
 include/asm-x86/uaccess_64.h | 51 ---------------------------
 3 files changed, 84 insertions(+), 128 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 8a1e45fdc98..bcda5d07592 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -178,6 +178,90 @@ extern int __get_user_bad(void);
 	__ret_gu;							\
 })
 
+#ifdef CONFIG_X86_32
+#define __put_user_u64(x, addr, err)					\
+	asm volatile("1:	movl %%eax,0(%2)\n"			\
+		     "2:	movl %%edx,4(%2)\n"			\
+		     "3:\n"						\
+		     ".section .fixup,\"ax\"\n"				\
+		     "4:	movl %3,%0\n"				\
+		     "	jmp 3b\n"					\
+		     ".previous\n"					\
+		     _ASM_EXTABLE(1b, 4b)				\
+		     _ASM_EXTABLE(2b, 4b)				\
+		     : "=r" (err)					\
+		     : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err))
+#else
+#define __put_user_u64(x, ptr, retval) \
+	__put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT)
+#endif
+
+#ifdef CONFIG_X86_WP_WORKS_OK
+
+#define __put_user_size(x, ptr, size, retval, errret)			\
+do {									\
+	retval = 0;							\
+	__chk_user_ptr(ptr);						\
+	switch (size) {							\
+	case 1:								\
+		__put_user_asm(x, ptr, retval, "b", "b", "iq", errret);	\
+		break;							\
+	case 2:								\
+		__put_user_asm(x, ptr, retval, "w", "w", "ir", errret);	\
+		break;							\
+	case 4:								\
+		__put_user_asm(x, ptr, retval, "l", "k",  "ir", errret);\
+		break;							\
+	case 8:								\
+		__put_user_u64((__typeof__(*ptr))(x), ptr, retval);	\
+		break;							\
+	default:							\
+		__put_user_bad();					\
+	}								\
+} while (0)
+
+#else
+
+#define __put_user_size(x, ptr, size, retval, errret)			\
+do {									\
+	__typeof__(*(ptr))__pus_tmp = x;				\
+	retval = 0;							\
+									\
+	if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0))	\
+		retval = errret;					\
+} while (0)
+
+#endif
+
+#define __put_user_nocheck(x, ptr, size)			\
+({								\
+	long __pu_err;						\
+	__put_user_size((x), (ptr), (size), __pu_err, -EFAULT);	\
+	__pu_err;						\
+})
+
+
+
+/* FIXME: this hack is definitely wrong -AK */
+struct __large_struct { unsigned long buf[100]; };
+#define __m(x) (*(struct __large_struct __user *)(x))
+
+/*
+ * Tell gcc we read from memory instead of writing: this is because
+ * we do not write to any memory gcc knows about, so there are no
+ * aliasing issues.
+ */
+#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret)	\
+	asm volatile("1:	mov"itype" %"rtype"1,%2\n"		\
+		     "2:\n"						\
+		     ".section .fixup,\"ax\"\n"				\
+		     "3:	mov %3,%0\n"				\
+		     "	jmp 2b\n"					\
+		     ".previous\n"					\
+		     _ASM_EXTABLE(1b, 3b)				\
+		     : "=r"(err)					\
+		     : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
+
 
 #ifdef CONFIG_X86_32
 # include "uaccess_32.h"
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 4c47a5ba65e..fab755781b9 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -145,83 +145,6 @@ extern void __put_user_8(void);
 #define __put_user(x, ptr)						\
 	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
 
-#define __put_user_nocheck(x, ptr, size)			\
-({								\
-	long __pu_err;						\
-	__put_user_size((x), (ptr), (size), __pu_err, -EFAULT);	\
-	__pu_err;						\
-})
-
-
-#define __put_user_u64(x, addr, err)					\
-	asm volatile("1:	movl %%eax,0(%2)\n"			\
-		     "2:	movl %%edx,4(%2)\n"			\
-		     "3:\n"						\
-		     ".section .fixup,\"ax\"\n"				\
-		     "4:	movl %3,%0\n"				\
-		     "	jmp 3b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE(1b, 4b)				\
-		     _ASM_EXTABLE(2b, 4b)				\
-		     : "=r" (err)					\
-		     : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err))
-
-#ifdef CONFIG_X86_WP_WORKS_OK
-
-#define __put_user_size(x, ptr, size, retval, errret)			\
-do {									\
-	retval = 0;							\
-	__chk_user_ptr(ptr);						\
-	switch (size) {							\
-	case 1:								\
-		__put_user_asm(x, ptr, retval, "b", "b", "iq", errret);	\
-		break;							\
-	case 2:								\
-		__put_user_asm(x, ptr, retval, "w", "w", "ir", errret);	\
-		break;							\
-	case 4:								\
-		__put_user_asm(x, ptr, retval, "l", "k",  "ir", errret);\
-		break;							\
-	case 8:								\
-		__put_user_u64((__typeof__(*ptr))(x), ptr, retval);	\
-		break;							\
-	default:							\
-		__put_user_bad();					\
-	}								\
-} while (0)
-
-#else
-
-#define __put_user_size(x, ptr, size, retval, errret)			\
-do {									\
-	__typeof__(*(ptr))__pus_tmp = x;				\
-	retval = 0;							\
-									\
-	if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0))	\
-		retval = errret;					\
-} while (0)
-
-#endif
-struct __large_struct { unsigned long buf[100]; };
-#define __m(x) (*(struct __large_struct __user *)(x))
-
-/*
- * Tell gcc we read from memory instead of writing: this is because
- * we do not write to any memory gcc knows about, so there are no
- * aliasing issues.
- */
-#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret)	\
-	asm volatile("1:	mov"itype" %"rtype"1,%2\n"		\
-		     "2:\n"						\
-		     ".section .fixup,\"ax\"\n"				\
-		     "3:	movl %3,%0\n"				\
-		     "	jmp 2b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE(1b, 3b)				\
-		     : "=r"(err)					\
-		     : ltype (x), "m" (__m(addr)), "i" (errret), "0" (err))
-
-
 #define __get_user_nocheck(x, ptr, size)				\
 ({									\
 	long __gu_err;							\
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 6532d638d5a..42c01aa70e7 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -34,14 +34,6 @@ extern void __put_user_bad(void);
 #define __get_user_unaligned __get_user
 #define __put_user_unaligned __put_user
 
-#define __put_user_nocheck(x, ptr, size)		\
-({							\
-	int __pu_err;					\
-	__put_user_size((x), (ptr), (size), __pu_err, -EFAULT);	\
-	__pu_err;					\
-})
-
-
 #define __put_user_check(x, ptr, size)				\
 ({								\
 	int __pu_err;						\
@@ -65,49 +57,6 @@ extern void __put_user_bad(void);
 	__pu_err;						\
 })
 
-#define __put_user_size(x, ptr, size, retval, errret)			\
-do {									\
-	retval = 0;							\
-	__chk_user_ptr(ptr);						\
-	switch (size) {							\
-	case 1:								\
-		__put_user_asm(x, ptr, retval, "b", "b", "iq", errret);\
-		break;							\
-	case 2:								\
-		__put_user_asm(x, ptr, retval, "w", "w", "ir", errret);\
-		break;							\
-	case 4:								\
-		__put_user_asm(x, ptr, retval, "l", "k", "ir", errret);\
-		break;							\
-	case 8:								\
-		__put_user_asm(x, ptr, retval, "q", "", "Zr", errret);	\
-		break;							\
-	default:							\
-		__put_user_bad();					\
-	}								\
-} while (0)
-
-/* FIXME: this hack is definitely wrong -AK */
-struct __large_struct { unsigned long buf[100]; };
-#define __m(x) (*(struct __large_struct __user *)(x))
-
-/*
- * Tell gcc we read from memory instead of writing: this is because
- * we do not write to any memory gcc knows about, so there are no
- * aliasing issues.
- */
-#define __put_user_asm(x, addr, err, itype, rtype, ltype, errno)	\
-	asm volatile("1:	mov"itype" %"rtype"1,%2\n"		\
-		     "2:\n"						\
-		     ".section .fixup, \"ax\"\n"			\
-		     "3:	mov %3,%0\n"				\
-		     "	jmp 2b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE(1b, 3b)				\
-		     : "=r"(err)					\
-		     : ltype (x), "m" (__m(addr)), "i" (errno), "0" (err))
-
-
 #define __get_user_nocheck(x, ptr, size)			\
 ({								\
 	int __gu_err;						\
-- 
cgit v1.2.3-70-g09d2


From 6fd4725d75c5049641640ce23fa5896501dfbd42 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 25 Jun 2008 11:57:33 -0300
Subject: x86: don't always use EFAULT on __get_user_size.

Let the user of the macro specify the desired return.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess_64.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 42c01aa70e7..e0875d7ae89 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -61,27 +61,27 @@ extern void __put_user_bad(void);
 ({								\
 	int __gu_err;						\
 	unsigned long __gu_val;					\
-	__get_user_size(__gu_val, (ptr), (size), __gu_err);	\
+	__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);\
 	(x) = (__force typeof(*(ptr)))__gu_val;			\
 	__gu_err;						\
 })
 
-#define __get_user_size(x, ptr, size, retval)				\
+#define __get_user_size(x, ptr, size, retval, errret)			\
 do {									\
 	retval = 0;							\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
 	case 1:								\
-		__get_user_asm(x, ptr, retval, "b", "b", "=q", -EFAULT);\
+		__get_user_asm(x, ptr, retval, "b", "b", "=q", errret);\
 		break;							\
 	case 2:								\
-		__get_user_asm(x, ptr, retval, "w", "w", "=r", -EFAULT);\
+		__get_user_asm(x, ptr, retval, "w", "w", "=r", errret);\
 		break;							\
 	case 4:								\
-		__get_user_asm(x, ptr, retval, "l", "k", "=r", -EFAULT);\
+		__get_user_asm(x, ptr, retval, "l", "k", "=r", errret);\
 		break;							\
 	case 8:								\
-		__get_user_asm(x, ptr, retval, "q", "", "=r", -EFAULT);	\
+		__get_user_asm(x, ptr, retval, "q", "", "=r", errret);	\
 		break;							\
 	default:							\
 		(x) = __get_user_bad();					\
-- 
cgit v1.2.3-70-g09d2


From 3f168221167ca7a844fde3bf58e1c7ca0bf9741e Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 25 Jun 2008 12:48:47 -0300
Subject: x86: merge __get_user_asm and its users.

Move __get_user_asm and __get_user_size and __get_user_nocheck
to uaccess.h. This requires us to define a macro at __get_user_size
for the 64-bit access case.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess.h    | 50 +++++++++++++++++++++++++++++++++++++++++++-
 include/asm-x86/uaccess_32.h | 41 ------------------------------------
 include/asm-x86/uaccess_64.h | 43 -------------------------------------
 3 files changed, 49 insertions(+), 85 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index bcda5d07592..4353b2267a0 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -233,6 +233,47 @@ do {									\
 
 #endif
 
+#ifdef CONFIG_X86_32
+#define __get_user_asm_u64(x, ptr, retval, errret)	(x) = __get_user_bad()
+#else
+#define __get_user_asm_u64(x, ptr, retval, errret) \
+	 __get_user_asm(x, ptr, retval, "q", "", "=r", errret)
+#endif
+
+#define __get_user_size(x, ptr, size, retval, errret)			\
+do {									\
+	retval = 0;							\
+	__chk_user_ptr(ptr);						\
+	switch (size) {							\
+	case 1:								\
+		__get_user_asm(x, ptr, retval, "b", "b", "=q", errret);	\
+		break;							\
+	case 2:								\
+		__get_user_asm(x, ptr, retval, "w", "w", "=r", errret);	\
+		break;							\
+	case 4:								\
+		__get_user_asm(x, ptr, retval, "l", "k", "=r", errret);	\
+		break;							\
+	case 8:								\
+		__get_user_asm_u64(x, ptr, retval, errret);		\
+		break;							\
+	default:							\
+		(x) = __get_user_bad();					\
+	}								\
+} while (0)
+
+#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret)	\
+	asm volatile("1:	mov"itype" %2,%"rtype"1\n"		\
+		     "2:\n"						\
+		     ".section .fixup,\"ax\"\n"				\
+		     "3:	mov %3,%0\n"				\
+		     "	xor"itype" %"rtype"1,%"rtype"1\n"		\
+		     "	jmp 2b\n"					\
+		     ".previous\n"					\
+		     _ASM_EXTABLE(1b, 3b)				\
+		     : "=r" (err), ltype(x)				\
+		     : "m" (__m(addr)), "i" (errret), "0" (err))
+
 #define __put_user_nocheck(x, ptr, size)			\
 ({								\
 	long __pu_err;						\
@@ -240,7 +281,14 @@ do {									\
 	__pu_err;						\
 })
 
-
+#define __get_user_nocheck(x, ptr, size)				\
+({									\
+	long __gu_err;							\
+	unsigned long __gu_val;						\
+	__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);	\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
+	__gu_err;							\
+})
 
 /* FIXME: this hack is definitely wrong -AK */
 struct __large_struct { unsigned long buf[100]; };
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index fab755781b9..ebfe6b23ba5 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -145,47 +145,6 @@ extern void __put_user_8(void);
 #define __put_user(x, ptr)						\
 	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
 
-#define __get_user_nocheck(x, ptr, size)				\
-({									\
-	long __gu_err;							\
-	unsigned long __gu_val;						\
-	__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;				\
-	__gu_err;							\
-})
-
-#define __get_user_size(x, ptr, size, retval, errret)			\
-do {									\
-	retval = 0;							\
-	__chk_user_ptr(ptr);						\
-	switch (size) {							\
-	case 1:								\
-		__get_user_asm(x, ptr, retval, "b", "b", "=q", errret);	\
-		break;							\
-	case 2:								\
-		__get_user_asm(x, ptr, retval, "w", "w", "=r", errret);	\
-		break;							\
-	case 4:								\
-		__get_user_asm(x, ptr, retval, "l", "", "=r", errret);	\
-		break;							\
-	default:							\
-		(x) = __get_user_bad();					\
-	}								\
-} while (0)
-
-#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret)	\
-	asm volatile("1:	mov"itype" %2,%"rtype"1\n"		\
-		     "2:\n"						\
-		     ".section .fixup,\"ax\"\n"				\
-		     "3:	movl %3,%0\n"				\
-		     "	xor"itype" %"rtype"1,%"rtype"1\n"		\
-		     "	jmp 2b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE(1b, 3b)				\
-		     : "=r" (err), ltype (x)				\
-		     : "m" (__m(addr)), "i" (errret), "0" (err))
-
-
 unsigned long __must_check __copy_to_user_ll
 		(void __user *to, const void *from, unsigned long n);
 unsigned long __must_check __copy_from_user_ll
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index e0875d7ae89..42a9769687e 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -57,49 +57,6 @@ extern void __put_user_bad(void);
 	__pu_err;						\
 })
 
-#define __get_user_nocheck(x, ptr, size)			\
-({								\
-	int __gu_err;						\
-	unsigned long __gu_val;					\
-	__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);\
-	(x) = (__force typeof(*(ptr)))__gu_val;			\
-	__gu_err;						\
-})
-
-#define __get_user_size(x, ptr, size, retval, errret)			\
-do {									\
-	retval = 0;							\
-	__chk_user_ptr(ptr);						\
-	switch (size) {							\
-	case 1:								\
-		__get_user_asm(x, ptr, retval, "b", "b", "=q", errret);\
-		break;							\
-	case 2:								\
-		__get_user_asm(x, ptr, retval, "w", "w", "=r", errret);\
-		break;							\
-	case 4:								\
-		__get_user_asm(x, ptr, retval, "l", "k", "=r", errret);\
-		break;							\
-	case 8:								\
-		__get_user_asm(x, ptr, retval, "q", "", "=r", errret);	\
-		break;							\
-	default:							\
-		(x) = __get_user_bad();					\
-	}								\
-} while (0)
-
-#define __get_user_asm(x, addr, err, itype, rtype, ltype, errno)	\
-	asm volatile("1:	mov"itype" %2,%"rtype"1\n"		\
-		     "2:\n"						\
-		     ".section .fixup, \"ax\"\n"			\
-		     "3:	mov %3,%0\n"				\
-		     "	xor"itype" %"rtype"1,%"rtype"1\n"		\
-		     "	jmp 2b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE(1b, 3b)				\
-		     : "=r" (err), ltype (x)				\
-		     : "m" (__m(addr)), "i"(errno), "0"(err))
-
 /*
  * Copy To/From Userspace
  */
-- 
cgit v1.2.3-70-g09d2


From 64a978c1e3b2c0e2b663caf54719319919c5c17f Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 25 Jun 2008 12:59:37 -0300
Subject: x86: be more explicit in __put_user_x.

For both __put_user_x and __put_user_8 macros, pass the error
variable explicitly.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess_32.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index ebfe6b23ba5..2c9067324d2 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -31,11 +31,11 @@ extern void __put_user_2(void);
 extern void __put_user_4(void);
 extern void __put_user_8(void);
 
-#define __put_user_x(size, x, ptr)				\
+#define __put_user_x(size, x, ptr, __ret_pu)			\
 	asm volatile("call __put_user_" #size : "=a" (__ret_pu)	\
 		     :"0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
 
-#define __put_user_8(x, ptr)					\
+#define __put_user_8(x, ptr, __ret_pu)				\
 	asm volatile("call __put_user_8" : "=a" (__ret_pu)	\
 		     : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
 
@@ -66,19 +66,19 @@ extern void __put_user_8(void);
 	__pu_val = x;						\
 	switch (sizeof(*(ptr))) {				\
 	case 1:							\
-		__put_user_x(1, __pu_val, ptr);			\
+		__put_user_x(1, __pu_val, ptr, __ret_pu);	\
 		break;						\
 	case 2:							\
-		__put_user_x(2, __pu_val, ptr);			\
+		__put_user_x(2, __pu_val, ptr, __ret_pu);	\
 		break;						\
 	case 4:							\
-		__put_user_x(4, __pu_val, ptr);			\
+		__put_user_x(4, __pu_val, ptr, __ret_pu);	\
 		break;						\
 	case 8:							\
-		__put_user_8(__pu_val, ptr);			\
+		__put_user_8(__pu_val, ptr, __ret_pu);		\
 		break;						\
 	default:						\
-		__put_user_x(X, __pu_val, ptr);			\
+		__put_user_x(X, __pu_val, ptr, __ret_pu);	\
 		break;						\
 	}							\
 	__ret_pu;						\
-- 
cgit v1.2.3-70-g09d2


From ec840956d269e2beefe3a1c0fd7c1c70a199087f Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 25 Jun 2008 13:00:48 -0300
Subject: x86: turn __put_user_check directly into put_user.

We also check user pointer in x86_64 put_user, the way i386 does.

In a separate patch for bisecting purposes.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess_64.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 42a9769687e..9139854d37b 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -23,9 +23,6 @@ extern void __put_user_bad(void);
 		     :"c" (ptr),"a" (x)					\
 		     :"ebx")
 
-#define put_user(x, ptr)						\
-	__put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
 #define __get_user(x, ptr)						\
 	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
 #define __put_user(x, ptr)						\
@@ -34,11 +31,12 @@ extern void __put_user_bad(void);
 #define __get_user_unaligned __get_user
 #define __put_user_unaligned __put_user
 
-#define __put_user_check(x, ptr, size)				\
+#define put_user(x, ptr)					\
 ({								\
 	int __pu_err;						\
 	typeof(*(ptr)) __user *__pu_addr = (ptr);		\
-	switch (size) {						\
+	__chk_user_ptr(ptr);					\
+	switch (sizeof(*(ptr))) {				\
 	case 1:							\
 		__put_user_x(1, __pu_err, x, __pu_addr);	\
 		break;						\
-- 
cgit v1.2.3-70-g09d2


From e30a44fdbf11c7ca3a0096d71790f176a4a09e03 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 25 Jun 2008 13:17:43 -0300
Subject: x86: merge put_user.

Move both versions, which are highly similar, to uaccess.h.
Note that, for x86_64, X86_WP_WORKS_OK is always defined.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess.h    | 74 +++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/uaccess_32.h | 79 --------------------------------------------
 include/asm-x86/uaccess_64.h | 36 --------------------
 3 files changed, 74 insertions(+), 115 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 4353b2267a0..3a9092af5af 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -178,6 +178,12 @@ extern int __get_user_bad(void);
 	__ret_gu;							\
 })
 
+#define __put_user_x(size, x, ptr, __ret_pu)			\
+	asm volatile("call __put_user_" #size : "=a" (__ret_pu)	\
+		     :"0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
+
+
+
 #ifdef CONFIG_X86_32
 #define __put_user_u64(x, addr, err)					\
 	asm volatile("1:	movl %%eax,0(%2)\n"			\
@@ -191,13 +197,71 @@ extern int __get_user_bad(void);
 		     _ASM_EXTABLE(2b, 4b)				\
 		     : "=r" (err)					\
 		     : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err))
+
+#define __put_user_x8(x, ptr, __ret_pu)				\
+	asm volatile("call __put_user_8" : "=a" (__ret_pu)	\
+		     : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
 #else
 #define __put_user_u64(x, ptr, retval) \
 	__put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT)
+#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
 #endif
 
+extern void __put_user_bad(void);
+
+/*
+ * Strange magic calling convention: pointer in %ecx,
+ * value in %eax(:%edx), return value in %eax. clobbers %rbx
+ */
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
 #ifdef CONFIG_X86_WP_WORKS_OK
 
+/**
+ * put_user: - Write a simple value into user space.
+ * @x:   Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ */
+#define put_user(x, ptr)					\
+({								\
+	int __ret_pu;						\
+	__typeof__(*(ptr)) __pu_val;				\
+	__chk_user_ptr(ptr);					\
+	__pu_val = x;						\
+	switch (sizeof(*(ptr))) {				\
+	case 1:							\
+		__put_user_x(1, __pu_val, ptr, __ret_pu);	\
+		break;						\
+	case 2:							\
+		__put_user_x(2, __pu_val, ptr, __ret_pu);	\
+		break;						\
+	case 4:							\
+		__put_user_x(4, __pu_val, ptr, __ret_pu);	\
+		break;						\
+	case 8:							\
+		__put_user_x8(__pu_val, ptr, __ret_pu);		\
+		break;						\
+	default:						\
+		__put_user_x(X, __pu_val, ptr, __ret_pu);	\
+		break;						\
+	}							\
+	__ret_pu;						\
+})
+
 #define __put_user_size(x, ptr, size, retval, errret)			\
 do {									\
 	retval = 0;							\
@@ -231,6 +295,16 @@ do {									\
 		retval = errret;					\
 } while (0)
 
+#define put_user(x, ptr)					\
+({								\
+	int __ret_pu;						\
+	__typeof__(*(ptr))__pus_tmp = x;			\
+	__ret_pu = 0;						\
+	if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp,		\
+				       sizeof(*(ptr))) != 0))	\
+		__ret_pu = -EFAULT;				\
+	__ret_pu;						\
+})
 #endif
 
 #ifdef CONFIG_X86_32
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 2c9067324d2..e5c0437f509 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -20,85 +20,6 @@ extern struct movsl_mask {
 } ____cacheline_aligned_in_smp movsl_mask;
 #endif
 
-extern void __put_user_bad(void);
-
-/*
- * Strange magic calling convention: pointer in %ecx,
- * value in %eax(:%edx), return value in %eax, no clobbers.
- */
-extern void __put_user_1(void);
-extern void __put_user_2(void);
-extern void __put_user_4(void);
-extern void __put_user_8(void);
-
-#define __put_user_x(size, x, ptr, __ret_pu)			\
-	asm volatile("call __put_user_" #size : "=a" (__ret_pu)	\
-		     :"0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
-
-#define __put_user_8(x, ptr, __ret_pu)				\
-	asm volatile("call __put_user_8" : "=a" (__ret_pu)	\
-		     : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
-
-
-/**
- * put_user: - Write a simple value into user space.
- * @x:   Value to copy to user space.
- * @ptr: Destination address, in user space.
- *
- * Context: User context only.  This function may sleep.
- *
- * This macro copies a single simple value from kernel space to user
- * space.  It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and @x must be assignable
- * to the result of dereferencing @ptr.
- *
- * Returns zero on success, or -EFAULT on error.
- */
-#ifdef CONFIG_X86_WP_WORKS_OK
-
-#define put_user(x, ptr)					\
-({								\
-	int __ret_pu;						\
-	__typeof__(*(ptr)) __pu_val;				\
-	__chk_user_ptr(ptr);					\
-	__pu_val = x;						\
-	switch (sizeof(*(ptr))) {				\
-	case 1:							\
-		__put_user_x(1, __pu_val, ptr, __ret_pu);	\
-		break;						\
-	case 2:							\
-		__put_user_x(2, __pu_val, ptr, __ret_pu);	\
-		break;						\
-	case 4:							\
-		__put_user_x(4, __pu_val, ptr, __ret_pu);	\
-		break;						\
-	case 8:							\
-		__put_user_8(__pu_val, ptr, __ret_pu);		\
-		break;						\
-	default:						\
-		__put_user_x(X, __pu_val, ptr, __ret_pu);	\
-		break;						\
-	}							\
-	__ret_pu;						\
-})
-
-#else
-#define put_user(x, ptr)					\
-({								\
-	int __ret_pu;						\
-	__typeof__(*(ptr))__pus_tmp = x;			\
-	__ret_pu = 0;						\
-	if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp,		\
-				       sizeof(*(ptr))) != 0))	\
-		__ret_pu = -EFAULT;				\
-	__ret_pu;						\
-})
-
-
-#endif
-
 /**
  * __get_user: - Get a simple variable from user space, with less checking.
  * @x:   Variable to store result.
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 9139854d37b..2e75a5d1a17 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -11,18 +11,6 @@
 
 #define ARCH_HAS_SEARCH_EXTABLE
 
-extern void __put_user_1(void);
-extern void __put_user_2(void);
-extern void __put_user_4(void);
-extern void __put_user_8(void);
-extern void __put_user_bad(void);
-
-#define __put_user_x(size, ret, x, ptr)					\
-	asm volatile("call __put_user_" #size				\
-		     :"=a" (ret)					\
-		     :"c" (ptr),"a" (x)					\
-		     :"ebx")
-
 #define __get_user(x, ptr)						\
 	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
 #define __put_user(x, ptr)						\
@@ -31,30 +19,6 @@ extern void __put_user_bad(void);
 #define __get_user_unaligned __get_user
 #define __put_user_unaligned __put_user
 
-#define put_user(x, ptr)					\
-({								\
-	int __pu_err;						\
-	typeof(*(ptr)) __user *__pu_addr = (ptr);		\
-	__chk_user_ptr(ptr);					\
-	switch (sizeof(*(ptr))) {				\
-	case 1:							\
-		__put_user_x(1, __pu_err, x, __pu_addr);	\
-		break;						\
-	case 2:							\
-		__put_user_x(2, __pu_err, x, __pu_addr);	\
-		break;						\
-	case 4:							\
-		__put_user_x(4, __pu_err, x, __pu_addr);	\
-		break;						\
-	case 8:							\
-		__put_user_x(8, __pu_err, x, __pu_addr);	\
-		break;						\
-	default:						\
-		__put_user_bad();				\
-	}							\
-	__pu_err;						\
-})
-
 /*
  * Copy To/From Userspace
  */
-- 
cgit v1.2.3-70-g09d2


From 8cb834e99f44bd56409b794504ae2b170675fc92 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 25 Jun 2008 14:43:30 -0300
Subject: x86: move __get_user and __put_user into uaccess.h.

We also carry the unaligned version with us. Only x86_64 uses
it, but there's no problem in defining it.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess.h    | 47 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/uaccess_32.h | 46 -------------------------------------------
 include/asm-x86/uaccess_64.h |  8 --------
 3 files changed, 47 insertions(+), 54 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 3a9092af5af..7c7b46af636 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -383,7 +383,54 @@ struct __large_struct { unsigned long buf[100]; };
 		     _ASM_EXTABLE(1b, 3b)				\
 		     : "=r"(err)					\
 		     : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
+/**
+ * __get_user: - Get a simple variable from user space, with less checking.
+ * @x:   Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+
+#define __get_user(x, ptr)						\
+	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+/**
+ * __put_user: - Write a simple value into user space, with less checking.
+ * @x:   Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ */
+
+#define __put_user(x, ptr)						\
+	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
 
+#define __get_user_unaligned __get_user
+#define __put_user_unaligned __put_user
 
 #ifdef CONFIG_X86_32
 # include "uaccess_32.h"
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index e5c0437f509..d3b5bf88ea8 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -20,52 +20,6 @@ extern struct movsl_mask {
 } ____cacheline_aligned_in_smp movsl_mask;
 #endif
 
-/**
- * __get_user: - Get a simple variable from user space, with less checking.
- * @x:   Variable to store result.
- * @ptr: Source address, in user space.
- *
- * Context: User context only.  This function may sleep.
- *
- * This macro copies a single simple variable from user space to kernel
- * space.  It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and the result of
- * dereferencing @ptr must be assignable to @x without a cast.
- *
- * Caller must check the pointer with access_ok() before calling this
- * function.
- *
- * Returns zero on success, or -EFAULT on error.
- * On error, the variable @x is set to zero.
- */
-#define __get_user(x, ptr)				\
-	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
-
-
-/**
- * __put_user: - Write a simple value into user space, with less checking.
- * @x:   Value to copy to user space.
- * @ptr: Destination address, in user space.
- *
- * Context: User context only.  This function may sleep.
- *
- * This macro copies a single simple value from kernel space to user
- * space.  It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and @x must be assignable
- * to the result of dereferencing @ptr.
- *
- * Caller must check the pointer with access_ok() before calling this
- * function.
- *
- * Returns zero on success, or -EFAULT on error.
- */
-#define __put_user(x, ptr)						\
-	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
 unsigned long __must_check __copy_to_user_ll
 		(void __user *to, const void *from, unsigned long n);
 unsigned long __must_check __copy_from_user_ll
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 2e75a5d1a17..b5bacd61368 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -11,14 +11,6 @@
 
 #define ARCH_HAS_SEARCH_EXTABLE
 
-#define __get_user(x, ptr)						\
-	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
-#define __put_user(x, ptr)						\
-	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
-#define __get_user_unaligned __get_user
-#define __put_user_unaligned __put_user
-
 /*
  * Copy To/From Userspace
  */
-- 
cgit v1.2.3-70-g09d2


From 8bc7de0c5dc0a5d3bcdc04bac6de0c799f91c5e4 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 25 Jun 2008 14:53:41 -0300
Subject: x86: put movsl_mask into uaccess.h.

x86_64 does not need it, but it won't have X86_INTEL_USERCOPY
defined either.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess.h    | 9 +++++++++
 include/asm-x86/uaccess_32.h | 9 ---------
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 7c7b46af636..0c4ab788d01 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -432,6 +432,15 @@ struct __large_struct { unsigned long buf[100]; };
 #define __get_user_unaligned __get_user
 #define __put_user_unaligned __put_user
 
+/*
+ * movsl can be slow when source and dest are not both 8-byte aligned
+ */
+#ifdef CONFIG_X86_INTEL_USERCOPY
+extern struct movsl_mask {
+	int mask;
+} ____cacheline_aligned_in_smp movsl_mask;
+#endif
+
 #ifdef CONFIG_X86_32
 # include "uaccess_32.h"
 #else
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index d3b5bf88ea8..3467749c6be 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -11,15 +11,6 @@
 #include <asm/asm.h>
 #include <asm/page.h>
 
-/*
- * movsl can be slow when source and dest are not both 8-byte aligned
- */
-#ifdef CONFIG_X86_INTEL_USERCOPY
-extern struct movsl_mask {
-	int mask;
-} ____cacheline_aligned_in_smp movsl_mask;
-#endif
-
 unsigned long __must_check __copy_to_user_ll
 		(void __user *to, const void *from, unsigned long n);
 unsigned long __must_check __copy_from_user_ll
-- 
cgit v1.2.3-70-g09d2


From 22cac1670786108ccd4caa0656c39fa4ba69fa7d Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Wed, 25 Jun 2008 14:56:53 -0300
Subject: x86: define architectural characteristics in uaccess.h.

Remove them from the arch-specific file.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/uaccess.h    | 3 +++
 include/asm-x86/uaccess_32.h | 2 --
 include/asm-x86/uaccess_64.h | 3 ---
 3 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 0c4ab788d01..f6fa4d841bb 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -441,9 +441,12 @@ extern struct movsl_mask {
 } ____cacheline_aligned_in_smp movsl_mask;
 #endif
 
+#define ARCH_HAS_NOCACHE_UACCESS 1
+
 #ifdef CONFIG_X86_32
 # include "uaccess_32.h"
 #else
+# define ARCH_HAS_SEARCH_EXTABLE
 # include "uaccess_64.h"
 #endif
 
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 3467749c6be..6fdef39a0bc 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -156,8 +156,6 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
 	return __copy_from_user_ll(to, from, n);
 }
 
-#define ARCH_HAS_NOCACHE_UACCESS
-
 static __always_inline unsigned long __copy_from_user_nocache(void *to,
 				const void __user *from, unsigned long n)
 {
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index b5bacd61368..4e3ec004e14 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -9,8 +9,6 @@
 #include <linux/prefetch.h>
 #include <asm/page.h>
 
-#define ARCH_HAS_SEARCH_EXTABLE
-
 /*
  * Copy To/From Userspace
  */
@@ -180,7 +178,6 @@ __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size)
 	return copy_user_generic((__force void *)dst, src, size);
 }
 
-#define ARCH_HAS_NOCACHE_UACCESS 1
 extern long __copy_user_nocache(void *dst, const void __user *src,
 				unsigned size, int zerorest);
 
-- 
cgit v1.2.3-70-g09d2


From 2dc807b37b7b8c7df445513ad2b415df4ebcaf6d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 8 Jul 2008 18:56:38 -0700
Subject: x86: make max_pfn cover acpi table below 4g

When system have 4g less ram installed, and acpi table sit
near end of ram, make max_pfn cover them too,
so 64bit kernel don't need to mess up fixmap.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: "Suresh Siddha" <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c  | 18 ++++++++++++------
 arch/x86/kernel/setup.c | 13 +++----------
 include/asm-x86/e820.h  |  2 +-
 3 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index e07d4019e26..2e08619a9c5 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1056,12 +1056,20 @@ unsigned long __initdata end_user_pfn = MAX_ARCH_PFN;
 /*
  * Find the highest page frame number we have available
  */
-unsigned long __init e820_end_of_ram(void)
+unsigned long __init e820_end(void)
 {
-	unsigned long last_pfn;
+	int i;
+	unsigned long last_pfn = 0;
 	unsigned long max_arch_pfn = MAX_ARCH_PFN;
 
-	last_pfn = find_max_pfn_with_active_regions();
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		unsigned long end_pfn;
+
+		end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
+		if (end_pfn > last_pfn)
+			last_pfn = end_pfn;
+	}
 
 	if (last_pfn > max_arch_pfn)
 		last_pfn = max_arch_pfn;
@@ -1192,9 +1200,7 @@ static int __init parse_memmap_opt(char *p)
 		 * the real mem size before original memory map is
 		 * reset.
 		 */
-		e820_register_active_regions(0, 0, -1UL);
-		saved_max_pfn = e820_end_of_ram();
-		remove_all_active_ranges();
+		saved_max_pfn = e820_end();
 #endif
 		e820.nr_map = 0;
 		userdef = 1;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index bea8ae77d05..a7c3471ea17 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -709,22 +709,18 @@ void __init setup_arch(char **cmdline_p)
 	early_gart_iommu_check();
 #endif
 
-	e820_register_active_regions(0, 0, -1UL);
 	/*
 	 * partially used pages are not usable - thus
 	 * we are rounding upwards:
 	 */
-	max_pfn = e820_end_of_ram();
+	max_pfn = e820_end();
 
 	/* preallocate 4k for mptable mpc */
 	early_reserve_e820_mpc_new();
 	/* update e820 for memory not covered by WB MTRRs */
 	mtrr_bp_init();
-	if (mtrr_trim_uncached_memory(max_pfn)) {
-		remove_all_active_ranges();
-		e820_register_active_regions(0, 0, -1UL);
-		max_pfn = e820_end_of_ram();
-	}
+	if (mtrr_trim_uncached_memory(max_pfn))
+		max_pfn = e820_end();
 
 #ifdef CONFIG_X86_32
 	/* max_low_pfn get updated here */
@@ -767,9 +763,6 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	acpi_boot_table_init();
 
-	/* Remove active ranges so rediscovery with NUMA-awareness happens */
-	remove_all_active_ranges();
-
 #ifdef CONFIG_ACPI_NUMA
 	/*
 	 * Parse SRAT to discover nodes.
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index a20d0a7f589..78c03d7bf44 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -99,7 +99,7 @@ extern void free_early(u64 start, u64 end);
 extern void early_res_to_bootmem(u64 start, u64 end);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 
-extern unsigned long e820_end_of_ram(void);
+extern unsigned long e820_end(void);
 extern int e820_find_active_region(const struct e820entry *ei,
 				  unsigned long start_pfn,
 				  unsigned long last_pfn,
-- 
cgit v1.2.3-70-g09d2


From 8cbfd4f44d5c6cb1ba9e44f493c2f2a2c9b769d7 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 9 Jul 2008 16:29:41 +0900
Subject: x86, iommu: replace CONFIG_IOMMU with CONFIG_GART_IOMMU in iommu.h

Hmm, looks like it would be nice to have more cleanups of iommu.h and
gart.h.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/iommu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h
index 07862fdd23c..cb7e9891bfd 100644
--- a/include/asm-x86/iommu.h
+++ b/include/asm-x86/iommu.h
@@ -5,7 +5,7 @@ extern void pci_iommu_shutdown(void);
 extern void no_iommu_init(void);
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
-#ifdef CONFIG_IOMMU
+#ifdef CONFIG_GART_IOMMU
 extern void gart_iommu_init(void);
 extern void gart_iommu_shutdown(void);
 extern void __init gart_parse_options(char *);
-- 
cgit v1.2.3-70-g09d2


From e2079c43861f71b2deb78ee20e247ad954fdd67e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 8 Jul 2008 16:12:26 +0200
Subject: x86: fix C1E && nx6325 stability problem

The problems are that, with the ACPI vs timer overring issue _fixed_,
after using the box for some time (between several seconds and 1 hour, at
random) processes get very high CPU loads (once I've got X using 107% of
the CPU, for example) and the system becomes unresponsive, as though there
were interrupts lost or something similar.

Andreas Herrman reproduced similar problems:

> Ok, now I've reproduced the stability problem.
> - Using tip/master,
> - reverting e38502eb8aa82314d5ab0eba45f50e6790dadd88 and
> - applying your patch from this posting
>   http://marc.info/?l=linux-kernel&m=121539354224562&w=4
>
> Starting X, firefox, gimp, tuxpaint and doing some drawing in tuxpaint
> results in a slow system. Drawing is almost not possible anymore --
> Selections of new colors, cursors etc. is performed with huge delay
> if it's performed at all.
>
> BTW, the code sets up timer IRQ as Virtual Wire IRQ:
>
> Jul  8 14:57:58 kodscha IO-APIC (apicid-pin) 2-22, 2-23 not connected.
> Jul  8 14:57:58 kodscha ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
> Jul  8 14:57:58 kodscha ...trying to set up timer as Virtual Wire IRQ... works.
>
> and both INT0 and INT2 of IOAPIC are masked:
>
> Jul  8 14:57:58 kodscha NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:
> Jul  8 14:57:58 kodscha 00 000 1    0    0   0   0    0    0    00
> Jul  8 14:57:58 kodscha 01 003 0    0    0   0   0    1    1    31
> Jul  8 14:57:58 kodscha 02 003 1    0    0   0   0    0    0    30
>
> I've also seen strange CPU utilization -- with syslog-ng:
>
> top - 15:33:06 up 35 min,  4 users,  load average: 1.70, 0.68, 0.37
> Tasks:  64 total,   4 running,  60 sleeping,   0 stopped,   0 zombie
> Cpu0  :  0.0%us,100.0%sy,  0.0%ni,  0.0%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
> Cpu1  :  6.4%us, 87.2%sy,  0.0%ni,  5.8%id,  0.0%wa,  0.6%hi,  0.0%si,  0.0%st
> Mem:    895384k total,   283568k used,   611816k free,    35492k buffers
> Swap:  1959920k total,        0k used,  1959920k free,   163044k cached
>
>   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
>  4632 root      20   0 17216  800  580 S  104  0.1   0:34.22 syslog-ng
> 28505 root      20   0  205m  11m 4024 S    6  1.3   0:21.16 X
> 28518 root      20   0 56292 5652 4492 S    1  0.6   0:01.80 fluxbox
>     1 root      20   0  3724  608  508 S    0  0.1   0:00.36 init
>
> So far I have no clue why C1E-idle in conjunction with virtual wire
> mode causes this strange behaviour.
>
> ... and I start to think about the root cause of all this.
>
> I've performed similar tests under X with the IRQ0/INT0 configuration and
> I did not see above symptoms.

So lets fall back to the IRQ0/INT0 configuration on this box.

This basically restores the dont-use-the-lapic-timer exception mechanism
that was unconditional on this box prior commit 8750bf5 ("x86: add C1E
aware idle function").

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c  | 43 +++++++++++++++++++++++++++++++++++++------
 arch/x86/kernel/io_apic_32.c | 10 ++++++++++
 arch/x86/kernel/io_apic_64.c | 10 ++++++++++
 include/asm-x86/genapic_32.h |  1 +
 include/asm-x86/genapic_64.h |  2 ++
 5 files changed, 60 insertions(+), 6 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 5c0107602b6..e1f01394b68 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1373,8 +1373,6 @@ static void __init acpi_process_madt(void)
 	return;
 }
 
-#ifdef __i386__
-
 static int __init disable_acpi_irq(const struct dmi_system_id *d)
 {
 	if (!acpi_force) {
@@ -1435,6 +1433,17 @@ dmi_disable_irq0_through_ioapic(const struct dmi_system_id *d)
 	return 0;
 }
 
+/*
+ * Force ignoring BIOS IRQ0 pin2 override
+ */
+static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
+{
+	pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident);
+	acpi_skip_timer_override = 1;
+	force_mask_ioapic_irq_2();
+	return 0;
+}
+
 /*
  * If your system is blacklisted here, but you find that acpi=force
  * works for you, please contact acpi-devel@sourceforge.net
@@ -1628,11 +1637,35 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
 		     DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"),
 		     },
 	 },
+	/*
+	 * HP laptops which use a DSDT reporting as HP/SB400/10000,
+	 * which includes some code which overrides all temperature
+	 * trip points to 16C if the INTIN2 input of the I/O APIC
+	 * is enabled.  This input is incorrectly designated the
+	 * ISA IRQ 0 via an interrupt source override even though
+	 * it is wired to the output of the master 8259A and INTIN0
+	 * is not connected at all.  Force ignoring BIOS IRQ0 pin2
+	 * override in that cases.
+	 */
+	{
+	 .callback = dmi_ignore_irq0_timer_override,
+	 .ident = "HP NX6125 laptop",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6125"),
+		     },
+	 },
+	{
+	 .callback = dmi_ignore_irq0_timer_override,
+	 .ident = "HP NX6325 laptop",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"),
+		     },
+	 },
 	{}
 };
 
-#endif				/* __i386__ */
-
 /*
  * acpi_boot_table_init() and acpi_boot_init()
  *  called from setup_arch(), always.
@@ -1660,9 +1693,7 @@ int __init acpi_boot_table_init(void)
 {
 	int error;
 
-#ifdef __i386__
 	dmi_check_system(acpi_dmi_table);
-#endif
 
 	/*
 	 * If acpi_disabled, bail out
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 337ec3438a8..6b220b9dcbb 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -59,6 +59,13 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 static DEFINE_SPINLOCK(ioapic_lock);
 static DEFINE_SPINLOCK(vector_lock);
 
+static bool mask_ioapic_irq_2 __initdata;
+
+void __init force_mask_ioapic_irq_2(void)
+{
+	mask_ioapic_irq_2 = true;
+}
+
 int timer_through_8259 __initdata;
 
 /*
@@ -2172,6 +2179,9 @@ static inline void __init check_timer(void)
 	printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
 		vector, apic1, pin1, apic2, pin2);
 
+	if (mask_ioapic_irq_2)
+		mask_IO_APIC_irq(2);
+
 	/*
 	 * Some BIOS writers are clueless and report the ExtINTA
 	 * I/O APIC input from the cascaded 8259A as the timer
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 2b4c40bc12c..0494cdb270c 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -94,6 +94,13 @@ static int no_timer_check;
 
 static int disable_timer_pin_1 __initdata;
 
+static bool mask_ioapic_irq_2 __initdata;
+
+void __init force_mask_ioapic_irq_2(void)
+{
+	mask_ioapic_irq_2 = true;
+}
+
 int timer_through_8259 __initdata;
 
 /* Where if anywhere is the i8259 connect in external int mode */
@@ -1698,6 +1705,9 @@ static inline void __init check_timer(void)
 	apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
 		cfg->vector, apic1, pin1, apic2, pin2);
 
+	if (mask_ioapic_irq_2)
+		mask_IO_APIC_irq(2);
+
 	/*
 	 * Some BIOS writers are clueless and report the ExtINTA
 	 * I/O APIC input from the cascaded 8259A as the timer
diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h
index b02ea6e17de..8d4c8bdb906 100644
--- a/include/asm-x86/genapic_32.h
+++ b/include/asm-x86/genapic_32.h
@@ -119,5 +119,6 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
 #define is_uv_system()			0
 #define uv_wakeup_secondary(a, b)	1
 
+extern void force_mask_ioapic_irq_2(void);
 
 #endif
diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h
index 0f8504627c4..082ad020e41 100644
--- a/include/asm-x86/genapic_64.h
+++ b/include/asm-x86/genapic_64.h
@@ -46,4 +46,6 @@ extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
 
 extern void setup_apic_routing(void);
 
+extern void force_mask_ioapic_irq_2(void);
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 1129585a08baf58582c0da91e572cb29e3179acf Mon Sep 17 00:00:00 2001
From: Vitaly Mayatskikh <v.mayatskih@gmail.com>
Date: Wed, 2 Jul 2008 15:48:21 +0200
Subject: x86: introduce copy_user_handle_tail() routine

Introduce generic C routine for handling necessary tail operations after
protection fault in copy_*_user on x86.

Signed-off-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/usercopy_64.c   | 23 +++++++++++++++++++++++
 include/asm-x86/uaccess_64.h |  3 +++
 2 files changed, 26 insertions(+)

(limited to 'include/asm-x86')

diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 0c89d1bb028..f4df6e7c718 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -158,3 +158,26 @@ unsigned long copy_in_user(void __user *to, const void __user *from, unsigned le
 }
 EXPORT_SYMBOL(copy_in_user);
 
+/*
+ * Try to copy last bytes and clear the rest if needed.
+ * Since protection fault in copy_from/to_user is not a normal situation,
+ * it is not necessary to optimize tail handling.
+ */
+unsigned long
+copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
+{
+	char c;
+	unsigned zero_len;
+
+	for (; len; --len) {
+		if (__get_user_nocheck(c, from++, sizeof(char)))
+			break;
+		if (__put_user_nocheck(c, to++, sizeof(char)))
+			break;
+	}
+
+	for (c = 0, zero_len = len; zerorest && zero_len; --zero_len)
+		if (__put_user_nocheck(c, to++, sizeof(char)))
+			break;
+	return len;
+}
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 4e3ec004e14..515d4dce96b 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -195,4 +195,7 @@ static inline int __copy_from_user_inatomic_nocache(void *dst,
 	return __copy_user_nocache(dst, src, size, 0);
 }
 
+unsigned long
+copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest);
+
 #endif /* __X86_64_UACCESS_H */
-- 
cgit v1.2.3-70-g09d2


From 4fb3dc2729c22ed1b023475fe28b720460251de1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 9 Jul 2008 13:07:23 +0200
Subject: x86: build fix for "x86: fix C1E && nx6325 stability problem"

fix:

 arch/x86/kernel/built-in.o: In function `dmi_ignore_irq0_timer_override':
 boot.c:(.init.text+0x3ea4): undefined reference to `force_mask_ioapic_irq_2'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/genapic_32.h | 4 ++++
 include/asm-x86/genapic_64.h | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h
index 8d4c8bdb906..33a73f5ed22 100644
--- a/include/asm-x86/genapic_32.h
+++ b/include/asm-x86/genapic_32.h
@@ -119,6 +119,10 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
 #define is_uv_system()			0
 #define uv_wakeup_secondary(a, b)	1
 
+#ifdef CONFIG_X86_IO_APIC
 extern void force_mask_ioapic_irq_2(void);
+#else
+static inline void force_mask_ioapic_irq_2(void) { }
+#endif
 
 #endif
diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h
index 082ad020e41..647e4e5c258 100644
--- a/include/asm-x86/genapic_64.h
+++ b/include/asm-x86/genapic_64.h
@@ -46,6 +46,10 @@ extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
 
 extern void setup_apic_routing(void);
 
+#ifdef CONFIG_X86_IO_APIC
 extern void force_mask_ioapic_irq_2(void);
+#else
+static inline void force_mask_ioapic_irq_2(void) { }
+#endif
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From f34fa82b19581affffb14f8ad9bdad9b5ab4daf5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Wed, 9 Jul 2008 20:16:36 -0700
Subject: x86, acpi: merge __acpi_map_table

and let 64-bit to fall back to use fixmap too.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c | 23 ++++++-----------------
 include/asm-x86/fixmap_64.h |  5 +++++
 2 files changed, 11 insertions(+), 17 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index bf7b4f7f60e..a31a579a47c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -109,21 +109,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
  */
 enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
 
-#ifdef	CONFIG_X86_64
-
-/* rely on all ACPI tables being in the direct mapping */
-char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
-{
-	if (!phys_addr || !size)
-		return NULL;
-
-	if (phys_addr+size <= (max_pfn_mapped << PAGE_SHIFT) + PAGE_SIZE)
-		return __va(phys_addr);
-
-	return NULL;
-}
-
-#else
 
 /*
  * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
@@ -142,11 +127,15 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
 	unsigned long base, offset, mapped_size;
 	int idx;
 
-	if (phys + size < 8 * 1024 * 1024)
+	if (!phys || !size)
+		return NULL;
+
+	if (phys+size <= (max_pfn_mapped << PAGE_SHIFT))
 		return __va(phys);
 
 	offset = phys & (PAGE_SIZE - 1);
 	mapped_size = PAGE_SIZE - offset;
+	clear_fixmap(FIX_ACPI_END);
 	set_fixmap(FIX_ACPI_END, phys);
 	base = fix_to_virt(FIX_ACPI_END);
 
@@ -158,13 +147,13 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
 		if (--idx < FIX_ACPI_BEGIN)
 			return NULL;	/* cannot handle this */
 		phys += PAGE_SIZE;
+		clear_fixmap(idx);
 		set_fixmap(idx, phys);
 		mapped_size += PAGE_SIZE;
 	}
 
 	return ((unsigned char *)base + offset);
 }
-#endif
 
 #ifdef CONFIG_PCI_MMCONFIG
 /* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h
index 1a0b61eb02f..6a4789d57e6 100644
--- a/include/asm-x86/fixmap_64.h
+++ b/include/asm-x86/fixmap_64.h
@@ -12,6 +12,7 @@
 #define _ASM_FIXMAP_64_H
 
 #include <linux/kernel.h>
+#include <asm/acpi.h>
 #include <asm/apicdef.h>
 #include <asm/page.h>
 #include <asm/vsyscall.h>
@@ -49,6 +50,10 @@ enum fixed_addresses {
 #ifdef CONFIG_PARAVIRT
 	FIX_PARAVIRT_BOOTMAP,
 #endif
+#ifdef CONFIG_ACPI
+	FIX_ACPI_BEGIN,
+	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
+#endif
 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
 	FIX_OHCI1394_BASE,
 #endif
-- 
cgit v1.2.3-70-g09d2


From e93be88d7ec9d183c1cca51eabe040c051cdb687 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 10 Jul 2008 08:27:49 +0900
Subject: x86: clean up iommu.h and gart.h

This remove lots of duplications in iommu.h and gart.h.

The end result of this patch is:

- iommu.h is a header file for everyone related with IOMMUs.

- gart.h is the private header file. Only pci-gart_64.c and its friends
  include it.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: fujita.tomonori@lab.ntt.co.jp
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/gart.h  | 34 ++++------------------------------
 include/asm-x86/iommu.h | 29 +++++++++++++++++------------
 2 files changed, 21 insertions(+), 42 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h
index 417f76ea677..33b9aeeb35a 100644
--- a/include/asm-x86/gart.h
+++ b/include/asm-x86/gart.h
@@ -1,40 +1,14 @@
-#ifndef _ASM_X8664_IOMMU_H
-#define _ASM_X8664_IOMMU_H 1
+#ifndef _ASM_X8664_GART_H
+#define _ASM_X8664_GART_H 1
 
 #include <asm/e820.h>
+#include <asm/iommu.h>
 
-extern void pci_iommu_shutdown(void);
-extern void no_iommu_init(void);
-extern int force_iommu, no_iommu;
-extern int iommu_detected;
-extern int agp_amd64_init(void);
-#ifdef CONFIG_GART_IOMMU
-extern void gart_iommu_init(void);
-extern void gart_iommu_shutdown(void);
-extern void __init gart_parse_options(char *);
-extern void early_gart_iommu_check(void);
-extern void gart_iommu_hole_init(void);
 extern void set_up_gart_resume(u32, u32);
+
 extern int fallback_aper_order;
 extern int fallback_aper_force;
-extern int gart_iommu_aperture;
-extern int gart_iommu_aperture_allowed;
-extern int gart_iommu_aperture_disabled;
 extern int fix_aperture;
-#else
-#define gart_iommu_aperture		0
-#define gart_iommu_aperture_allowed	0
-#define gart_iommu_aperture_disabled	1
-
-static inline void early_gart_iommu_check(void)
-{
-}
-
-static inline void gart_iommu_shutdown(void)
-{
-}
-
-#endif
 
 /* PTE bits. */
 #define GPTE_VALID	1
diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h
index cb7e9891bfd..068c9a40aa5 100644
--- a/include/asm-x86/iommu.h
+++ b/include/asm-x86/iommu.h
@@ -1,29 +1,34 @@
-#ifndef _ASM_X8664_GART_H
-#define _ASM_X8664_GART_H 1
+#ifndef _ASM_X8664_IOMMU_H
+#define _ASM_X8664_IOMMU_H 1
 
 extern void pci_iommu_shutdown(void);
 extern void no_iommu_init(void);
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
+
 #ifdef CONFIG_GART_IOMMU
+extern int gart_iommu_aperture;
+extern int gart_iommu_aperture_allowed;
+extern int gart_iommu_aperture_disabled;
+
+extern void early_gart_iommu_check(void);
 extern void gart_iommu_init(void);
 extern void gart_iommu_shutdown(void);
 extern void __init gart_parse_options(char *);
-extern void iommu_hole_init(void);
-extern int fallback_aper_order;
-extern int fallback_aper_force;
-extern int iommu_aperture;
-extern int iommu_aperture_allowed;
-extern int iommu_aperture_disabled;
-extern int fix_aperture;
+extern void gart_iommu_hole_init(void);
+
 #else
-#define iommu_aperture 0
-#define iommu_aperture_allowed 0
+#define gart_iommu_aperture            0
+#define gart_iommu_aperture_allowed    0
+#define gart_iommu_aperture_disabled   1
 
-static inline void gart_iommu_shutdown(void)
+static inline void early_gart_iommu_check(void)
 {
 }
 
+static inline void gart_iommu_shutdown(void)
+{
+}
 #endif
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From ca201c8230de336c3684aa3f3422d0c3f02bcef9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 12:33:20 +0200
Subject: x86, visws: fix generic-ipi build

fix:

 arch/x86/kernel/built-in.o: In function `smp_intr_init':
 (.init.text+0x49e2): undefined reference to `call_function_single_interrupt'

Caused by include/asm-x86/mach-visws/entry_arch.h getting out of sync
with the include/asm-x86/mach-default/entry_arch.h file it derives from.

Copy the default file over - next step will be to simply include the default
file.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-visws/entry_arch.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-visws/entry_arch.h b/include/asm-x86/mach-visws/entry_arch.h
index b183fa6d83d..9283b60a1dd 100644
--- a/include/asm-x86/mach-visws/entry_arch.h
+++ b/include/asm-x86/mach-visws/entry_arch.h
@@ -1,3 +1,9 @@
+/*
+ * This file is designed to contain the BUILD_INTERRUPT specifications for
+ * all of the extra named interrupt vectors used by the architecture.
+ * Usually this is the Inter Process Interrupts (IPIs)
+ */
+
 /*
  * The following vectors are part of the Linux architecture, there
  * is no hardware IRQ pin equivalent for them, they are triggered
@@ -7,6 +13,7 @@
 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
+BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
 #endif
 
 /*
@@ -20,4 +27,9 @@ BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+
+#ifdef CONFIG_X86_MCE_P4THERMAL
+BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
+#endif
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 42a2f217a5e324ed5f2373ab1b7a0a15187c4d6c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 12:35:46 +0200
Subject: x86, visws: use mach-default/entry_arch.h

mach-default/entry_arch.h is exactly the same file as
mach-visws/entry_arch.h, so include the first from the second,
so that updates to the generic one get picked up by VISWS as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-visws/entry_arch.h | 34 ++-------------------------------
 1 file changed, 2 insertions(+), 32 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-visws/entry_arch.h b/include/asm-x86/mach-visws/entry_arch.h
index 9283b60a1dd..86be554342d 100644
--- a/include/asm-x86/mach-visws/entry_arch.h
+++ b/include/asm-x86/mach-visws/entry_arch.h
@@ -1,35 +1,5 @@
 /*
- * This file is designed to contain the BUILD_INTERRUPT specifications for
- * all of the extra named interrupt vectors used by the architecture.
- * Usually this is the Inter Process Interrupts (IPIs)
+ * VISWS uses the standard Linux entry points:
  */
 
-/*
- * The following vectors are part of the Linux architecture, there
- * is no hardware IRQ pin equivalent for them, they are triggered
- * through the ICC by us (IPIs)
- */
-#ifdef CONFIG_X86_SMP
-BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
-BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
-BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
-BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
-#endif
-
-/*
- * every pentium local APIC has two 'local interrupts', with a
- * soft-definable vector attached to both interrupts, one of
- * which is a timer interrupt, the other one is error counter
- * overflow. Linux uses the local APIC timer interrupt to get
- * a much simpler SMP time architecture:
- */
-#ifdef CONFIG_X86_LOCAL_APIC
-BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
-BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
-BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
-
-#ifdef CONFIG_X86_MCE_P4THERMAL
-BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
-#endif
-
-#endif
+#include "../mach-default/entry_arch.h"
-- 
cgit v1.2.3-70-g09d2


From 3b33553badcde952adcf3b3ba5faae38d7d85071 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 17:30:40 +0200
Subject: x86: add early quirk support

Add early quirks support.

In preparation of enabling the generic architecture to boot on a VISWS.

This will allow us to remove the VISWS subarch and all its complications.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c    | 11 +++++++++++
 arch/x86/kernel/mpparse.c | 20 ++++++++++++++++++--
 arch/x86/kernel/setup.c   |  1 +
 include/asm-x86/setup.h   | 17 +++++++++++++++++
 4 files changed, 47 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 9836a079cfd..269d367d2ac 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1297,6 +1297,11 @@ void __init e820_reserve_resources(void)
 	}
 }
 
+/*
+ * Non-standard memory setup can be specified via this quirk:
+ */
+char * (*arch_memory_setup_quirk)(void);
+
 char *__init default_machine_specific_memory_setup(void)
 {
 	char *who = "BIOS-e820";
@@ -1337,6 +1342,12 @@ char *__init default_machine_specific_memory_setup(void)
 
 char *__init __attribute__((weak)) machine_specific_memory_setup(void)
 {
+	if (arch_memory_setup_quirk) {
+		char *who = arch_memory_setup_quirk();
+
+		if (who)
+			return who;
+	}
 	return default_machine_specific_memory_setup();
 }
 
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 8b6b1e05c30..3b25e49380c 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -725,13 +725,23 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 
 static struct intel_mp_floating *mpf_found;
 
+/*
+ * Machine specific quirk for finding the SMP config before other setup
+ * activities destroy the table:
+ */
+int (*mach_get_smp_config_quirk)(unsigned int early);
+
 /*
  * Scan the memory blocks for an SMP configuration block.
  */
-static void __init __get_smp_config(unsigned early)
+static void __init __get_smp_config(unsigned int early)
 {
 	struct intel_mp_floating *mpf = mpf_found;
 
+	if (mach_get_smp_config_quirk) {
+		if (mach_get_smp_config_quirk(early))
+			return;
+	}
 	if (acpi_lapic && early)
 		return;
 	/*
@@ -889,10 +899,16 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
 	return 0;
 }
 
-static void __init __find_smp_config(unsigned reserve)
+int (*mach_find_smp_config_quirk)(unsigned int reserve);
+
+static void __init __find_smp_config(unsigned int reserve)
 {
 	unsigned int address;
 
+	if (mach_find_smp_config_quirk) {
+		if (mach_find_smp_config_quirk(reserve))
+			return;
+	}
 	/*
 	 * FIXME: Linux assumes you have 640K of base ram..
 	 * this continues the error...
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a7c3471ea17..cb3db406247 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -596,6 +596,7 @@ void __init setup_arch(char **cmdline_p)
 {
 #ifdef CONFIG_X86_32
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
+	visws_early_detect();
 	pre_setup_arch_hook();
 	early_cpu_init();
 #else
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 1d121c632d9..1ad7eae0d9b 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -8,6 +8,23 @@
 /* Interrupt control for vSMPowered x86_64 systems */
 void vsmp_init(void);
 
+#ifdef CONFIG_X86_VISWS
+extern void visws_early_detect(void);
+#else
+static inline void visws_early_detect(void) { }
+#endif
+
+/*
+ * Any setup quirks to be performed?
+ */
+extern int (*arch_time_init_quirk)(void);
+extern int (*arch_pre_intr_init_quirk)(void);
+extern int (*arch_intr_init_quirk)(void);
+extern int (*arch_trap_init_quirk)(void);
+extern char * (*arch_memory_setup_quirk)(void);
+extern int (*mach_get_smp_config_quirk)(unsigned int early);
+extern int (*mach_find_smp_config_quirk)(unsigned int reserve);
+
 #ifndef CONFIG_PARAVIRT
 #define paravirt_post_allocator_init()	do {} while (0)
 #endif
-- 
cgit v1.2.3-70-g09d2


From 5548ed1135842d1993a4ba699377a8a3c65dd568 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 16:53:21 +0200
Subject: x86, VisWS: turn into generic arch, install proper PCI quirk

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mach-visws/setup_visws.c | 5 +++++
 arch/x86/pci/visws.c              | 7 ++++++-
 include/asm-x86/setup.h           | 2 ++
 3 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/mach-visws/setup_visws.c b/arch/x86/mach-visws/setup_visws.c
index 8401208eec5..bbc149f78a4 100644
--- a/arch/x86/mach-visws/setup_visws.c
+++ b/arch/x86/mach-visws/setup_visws.c
@@ -28,6 +28,11 @@
 char visws_board_type	= -1;
 char visws_board_rev	= -1;
 
+int is_visws_box(void)
+{
+	return visws_board_type >= 0;
+}
+
 static int __init visws_time_init_quirk(void)
 {
 	printk(KERN_INFO "Starting Cobalt Timer system clock\n");
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 2e022210a63..9b883890c0b 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -8,6 +8,7 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 
+#include <asm/setup.h>
 #include "cobalt.h"
 #include "lithium.h"
 
@@ -107,7 +108,11 @@ static int __init pci_visws_init(void)
 
 static __init int pci_subsys_init(void)
 {
-	return -1;
+	if (!is_visws_box())
+		return -1;
+
+	pcibios_enable_irq = &pci_visws_enable_irq;
+	pcibios_disable_irq = &pci_visws_disable_irq;
 
 	pci_visws_init();
 	pcibios_init();
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 1ad7eae0d9b..90ab2225e71 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -10,8 +10,10 @@ void vsmp_init(void);
 
 #ifdef CONFIG_X86_VISWS
 extern void visws_early_detect(void);
+extern int is_visws_box(void);
 #else
 static inline void visws_early_detect(void) { }
+static inline int is_visws_box(void) { return 0; }
 #endif
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 808fc878d373c338f138529c284df5f86ab3cc55 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 15:01:54 +0200
Subject: x86, VisWS: turn into generic arch, update
 asm-x86/mach-visws/mach_apic.h

update asm-x86/mach-visws/mach_apic.h to the generic version.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-visws/mach_apic.h | 104 ++++++++++++++++++++++-----------
 1 file changed, 71 insertions(+), 33 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-visws/mach_apic.h b/include/asm-x86/mach-visws/mach_apic.h
index a9ef33a8a99..0b2cde5e1b7 100644
--- a/include/asm-x86/mach-visws/mach_apic.h
+++ b/include/asm-x86/mach-visws/mach_apic.h
@@ -1,31 +1,40 @@
 #ifndef __ASM_MACH_APIC_H
 #define __ASM_MACH_APIC_H
 
+#ifdef CONFIG_X86_LOCAL_APIC
+
 #include <mach_apicdef.h>
 #include <asm/smp.h>
 
 #define APIC_DFR_VALUE	(APIC_DFR_FLAT)
 
-#define no_balance_irq (0)
-#define esr_disable (0)
-
-#define INT_DELIVERY_MODE dest_LowestPrio
-#define INT_DEST_MODE 1     /* logical delivery broadcast to all procs */
-
+static inline cpumask_t target_cpus(void)
+{ 
 #ifdef CONFIG_SMP
- #define TARGET_CPUS cpu_online_map
+	return cpu_online_map;
 #else
- #define TARGET_CPUS cpumask_of_cpu(0)
+	return cpumask_of_cpu(0);
 #endif
+} 
 
-#define check_apicid_used(bitmap, apicid)	physid_isset(apicid, bitmap)
-#define check_apicid_present(bit)		physid_isset(bit, phys_cpu_present_map)
-
-static inline int apic_id_registered(void)
-{
-	return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map);
-}
+#define NO_BALANCE_IRQ (0)
+#define esr_disable (0)
 
+#ifdef CONFIG_X86_64
+#include <asm/genapic.h>
+#define INT_DELIVERY_MODE (genapic->int_delivery_mode)
+#define INT_DEST_MODE (genapic->int_dest_mode)
+#define TARGET_CPUS	  (genapic->target_cpus())
+#define apic_id_registered (genapic->apic_id_registered)
+#define init_apic_ldr (genapic->init_apic_ldr)
+#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define phys_pkg_id	(genapic->phys_pkg_id)
+#define vector_allocation_domain    (genapic->vector_allocation_domain)
+extern void setup_apic_routing(void);
+#else
+#define INT_DELIVERY_MODE dest_LowestPrio
+#define INT_DEST_MODE 1     /* logical delivery broadcast to all procs */
+#define TARGET_CPUS (target_cpus())
 /*
  * Set up the logical destination ID.
  *
@@ -43,15 +52,55 @@ static inline void init_apic_ldr(void)
 	apic_write_around(APIC_LDR, val);
 }
 
-static inline void summit_check(char *oem, char *productid) 
+static inline int apic_id_registered(void)
+{
+	return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map);
+}
+
+static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+{
+	return cpus_addr(cpumask)[0];
+}
+
+static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
 {
+	return cpuid_apic >> index_msb;
 }
 
 static inline void setup_apic_routing(void)
 {
+#ifdef CONFIG_X86_IO_APIC
+	printk("Enabling APIC mode:  %s.  Using %d I/O APICs\n",
+					"Flat", nr_ioapics);
+#endif
 }
 
 static inline int apicid_to_node(int logical_apicid)
+{
+#ifdef CONFIG_SMP
+	return apicid_2_node[hard_smp_processor_id()];
+#else
+	return 0;
+#endif
+}
+#endif
+
+static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
+{
+	return physid_isset(apicid, bitmap);
+}
+
+static inline unsigned long check_apicid_present(int bit)
+{
+	return physid_isset(bit, phys_cpu_present_map);
+}
+
+static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
+{
+	return phys_map;
+}
+
+static inline int multi_timer_check(int apic, int irq)
 {
 	return 0;
 }
@@ -64,40 +113,29 @@ static inline int cpu_to_logical_apicid(int cpu)
 
 static inline int cpu_present_to_apicid(int mps_cpu)
 {
-	if (mps_cpu < get_physical_broadcast())
-		return mps_cpu;
+	if (mps_cpu < NR_CPUS && cpu_present(mps_cpu))
+		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
 	else
 		return BAD_APICID;
 }
 
-static inline physid_mask_t apicid_to_cpu_present(int apicid)
+static inline physid_mask_t apicid_to_cpu_present(int phys_apicid)
 {
-	return physid_mask_of_physid(apicid);
+	return physid_mask_of_physid(phys_apicid);
 }
 
-#define WAKE_SECONDARY_VIA_INIT
-
 static inline void setup_portio_remap(void)
 {
 }
 
-static inline void enable_apic_mode(void)
-{
-}
-
 static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
 {
 	return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map);
 }
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
-{
-	return cpus_addr(cpumask)[0];
-}
-
-static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
+static inline void enable_apic_mode(void)
 {
-	return cpuid_apic >> index_msb;
 }
 
+#endif /* CONFIG_X86_LOCAL_APIC */
 #endif /* __ASM_MACH_APIC_H */
-- 
cgit v1.2.3-70-g09d2


From d8d1bc73f0ccb60f4d6056333b8fcb3140772c21 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 15:04:13 +0200
Subject: x86, VisWS: turn into generic arch, eliminate
 include/asm-x86/mach-visws/mach_apic.h

now that include/asm-x86/mach-visws/mach_apic.h equals
to include/asm-x86/mach-default/mach_apic.h, simply start
using the generic one.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-visws/mach_apic.h | 142 +--------------------------------
 1 file changed, 1 insertion(+), 141 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-visws/mach_apic.h b/include/asm-x86/mach-visws/mach_apic.h
index 0b2cde5e1b7..6943e7a1d0e 100644
--- a/include/asm-x86/mach-visws/mach_apic.h
+++ b/include/asm-x86/mach-visws/mach_apic.h
@@ -1,141 +1 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-#include <mach_apicdef.h>
-#include <asm/smp.h>
-
-#define APIC_DFR_VALUE	(APIC_DFR_FLAT)
-
-static inline cpumask_t target_cpus(void)
-{ 
-#ifdef CONFIG_SMP
-	return cpu_online_map;
-#else
-	return cpumask_of_cpu(0);
-#endif
-} 
-
-#define NO_BALANCE_IRQ (0)
-#define esr_disable (0)
-
-#ifdef CONFIG_X86_64
-#include <asm/genapic.h>
-#define INT_DELIVERY_MODE (genapic->int_delivery_mode)
-#define INT_DEST_MODE (genapic->int_dest_mode)
-#define TARGET_CPUS	  (genapic->target_cpus())
-#define apic_id_registered (genapic->apic_id_registered)
-#define init_apic_ldr (genapic->init_apic_ldr)
-#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
-#define phys_pkg_id	(genapic->phys_pkg_id)
-#define vector_allocation_domain    (genapic->vector_allocation_domain)
-extern void setup_apic_routing(void);
-#else
-#define INT_DELIVERY_MODE dest_LowestPrio
-#define INT_DEST_MODE 1     /* logical delivery broadcast to all procs */
-#define TARGET_CPUS (target_cpus())
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116).  So here it goes...
- */
-static inline void init_apic_ldr(void)
-{
-	unsigned long val;
-
-	apic_write_around(APIC_DFR, APIC_DFR_VALUE);
-	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
-	val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
-	apic_write_around(APIC_LDR, val);
-}
-
-static inline int apic_id_registered(void)
-{
-	return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map);
-}
-
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
-{
-	return cpus_addr(cpumask)[0];
-}
-
-static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
-{
-	return cpuid_apic >> index_msb;
-}
-
-static inline void setup_apic_routing(void)
-{
-#ifdef CONFIG_X86_IO_APIC
-	printk("Enabling APIC mode:  %s.  Using %d I/O APICs\n",
-					"Flat", nr_ioapics);
-#endif
-}
-
-static inline int apicid_to_node(int logical_apicid)
-{
-#ifdef CONFIG_SMP
-	return apicid_2_node[hard_smp_processor_id()];
-#else
-	return 0;
-#endif
-}
-#endif
-
-static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
-{
-	return physid_isset(apicid, bitmap);
-}
-
-static inline unsigned long check_apicid_present(int bit)
-{
-	return physid_isset(bit, phys_cpu_present_map);
-}
-
-static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
-{
-	return phys_map;
-}
-
-static inline int multi_timer_check(int apic, int irq)
-{
-	return 0;
-}
-
-/* Mapping from cpu number to logical apicid */
-static inline int cpu_to_logical_apicid(int cpu)
-{
-	return 1 << cpu;
-}
-
-static inline int cpu_present_to_apicid(int mps_cpu)
-{
-	if (mps_cpu < NR_CPUS && cpu_present(mps_cpu))
-		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
-	else
-		return BAD_APICID;
-}
-
-static inline physid_mask_t apicid_to_cpu_present(int phys_apicid)
-{
-	return physid_mask_of_physid(phys_apicid);
-}
-
-static inline void setup_portio_remap(void)
-{
-}
-
-static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
-{
-	return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map);
-}
-
-static inline void enable_apic_mode(void)
-{
-}
-
-#endif /* CONFIG_X86_LOCAL_APIC */
-#endif /* __ASM_MACH_APIC_H */
+#include "../mach-default/mach_apic.h"
-- 
cgit v1.2.3-70-g09d2


From 6a64b5da9bbe561fd189361812f5ed205bb55345 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 15:09:04 +0200
Subject: x86, VisWS: turn into generic arch, enhance
 include/asm-x86/mach-default/smpboot_hooks.h

Allow the generic smpboot quirks code to be built with
ONFIG_X86_IO_APIC disabled. This way VISWS will be able
to use it as-is.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-default/smpboot_hooks.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-default/smpboot_hooks.h b/include/asm-x86/mach-default/smpboot_hooks.h
index b63c5218200..56d001b9dce 100644
--- a/include/asm-x86/mach-default/smpboot_hooks.h
+++ b/include/asm-x86/mach-default/smpboot_hooks.h
@@ -3,7 +3,9 @@
 
 static inline void smpboot_clear_io_apic_irqs(void)
 {
+#ifdef CONFIG_X86_IO_APIC
 	io_apic_irqs = 0;
+#endif
 }
 
 static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
@@ -35,6 +37,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
 
 static inline void __init smpboot_setup_io_apic(void)
 {
+#ifdef CONFIG_X86_IO_APIC
 	/*
 	 * Here we can be sure that there is an IO-APIC in the system. Let's
 	 * go and set it up:
@@ -45,9 +48,12 @@ static inline void __init smpboot_setup_io_apic(void)
 		nr_ioapics = 0;
 		localise_nmi_watchdog();
 	}
+#endif
 }
 
 static inline void smpboot_clear_io_apic(void)
 {
+#ifdef CONFIG_X86_IO_APIC
 	nr_ioapics = 0;
+#endif
 }
-- 
cgit v1.2.3-70-g09d2


From 244a5e2ee0ba204d66dfe4273df91c6bc65a310a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 15:06:07 +0200
Subject: x86, VisWS: turn into generic arch, update
 include/asm-x86/mach-visws/smpboot_hooks.h

update include/asm-x86/mach-visws/smpboot_hooks.h to
include/asm-x86/mach-default/smpboot_hooks.h (the generic version).

this _should_ work, because VISWS sets skip_ioapic_setup, but it
should be tested on a real VISWS to make sure.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-visws/smpboot_hooks.h | 45 +++++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 7 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-visws/smpboot_hooks.h b/include/asm-x86/mach-visws/smpboot_hooks.h
index c9b83e395a2..56d001b9dce 100644
--- a/include/asm-x86/mach-visws/smpboot_hooks.h
+++ b/include/asm-x86/mach-visws/smpboot_hooks.h
@@ -1,3 +1,13 @@
+/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
+ * which needs to alter them. */
+
+static inline void smpboot_clear_io_apic_irqs(void)
+{
+#ifdef CONFIG_X86_IO_APIC
+	io_apic_irqs = 0;
+#endif
+}
+
 static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
 {
 	CMOS_WRITE(0xa, 0xf);
@@ -9,20 +19,41 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
 	Dprintk("3.\n");
 }
 
-/* for visws do nothing for any of these */
-
-static inline void smpboot_clear_io_apic_irqs(void)
-{
-}
-
 static inline void smpboot_restore_warm_reset_vector(void)
 {
+	/*
+	 * Install writable page 0 entry to set BIOS data area.
+	 */
+	local_flush_tlb();
+
+	/*
+	 * Paranoid:  Set warm reset code and vector here back
+	 * to default values.
+	 */
+	CMOS_WRITE(0, 0xf);
+
+	*((volatile long *) phys_to_virt(0x467)) = 0;
 }
 
-static inline void smpboot_setup_io_apic(void)
+static inline void __init smpboot_setup_io_apic(void)
 {
+#ifdef CONFIG_X86_IO_APIC
+	/*
+	 * Here we can be sure that there is an IO-APIC in the system. Let's
+	 * go and set it up:
+	 */
+	if (!skip_ioapic_setup && nr_ioapics)
+		setup_IO_APIC();
+	else {
+		nr_ioapics = 0;
+		localise_nmi_watchdog();
+	}
+#endif
 }
 
 static inline void smpboot_clear_io_apic(void)
 {
+#ifdef CONFIG_X86_IO_APIC
+	nr_ioapics = 0;
+#endif
 }
-- 
cgit v1.2.3-70-g09d2


From 6649ababd52014afaca62a12aed37d797f9f865c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 15:10:29 +0200
Subject: x86, VisWS: turn into generic arch, eliminate
 include/asm-x86/mach-visws/smpboot_hooks.h

now that include/asm-x86/mach-visws/smpboot_hooks.h equals
to the default file in ../mach-default/smpboot_hooks.h, simply
include it instead of maintaining a copy.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-visws/smpboot_hooks.h | 60 +-----------------------------
 1 file changed, 1 insertion(+), 59 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-visws/smpboot_hooks.h b/include/asm-x86/mach-visws/smpboot_hooks.h
index 56d001b9dce..e4433ca8871 100644
--- a/include/asm-x86/mach-visws/smpboot_hooks.h
+++ b/include/asm-x86/mach-visws/smpboot_hooks.h
@@ -1,59 +1 @@
-/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
- * which needs to alter them. */
-
-static inline void smpboot_clear_io_apic_irqs(void)
-{
-#ifdef CONFIG_X86_IO_APIC
-	io_apic_irqs = 0;
-#endif
-}
-
-static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
-{
-	CMOS_WRITE(0xa, 0xf);
-	local_flush_tlb();
-	Dprintk("1.\n");
-	*((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
-	Dprintk("2.\n");
-	*((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
-	Dprintk("3.\n");
-}
-
-static inline void smpboot_restore_warm_reset_vector(void)
-{
-	/*
-	 * Install writable page 0 entry to set BIOS data area.
-	 */
-	local_flush_tlb();
-
-	/*
-	 * Paranoid:  Set warm reset code and vector here back
-	 * to default values.
-	 */
-	CMOS_WRITE(0, 0xf);
-
-	*((volatile long *) phys_to_virt(0x467)) = 0;
-}
-
-static inline void __init smpboot_setup_io_apic(void)
-{
-#ifdef CONFIG_X86_IO_APIC
-	/*
-	 * Here we can be sure that there is an IO-APIC in the system. Let's
-	 * go and set it up:
-	 */
-	if (!skip_ioapic_setup && nr_ioapics)
-		setup_IO_APIC();
-	else {
-		nr_ioapics = 0;
-		localise_nmi_watchdog();
-	}
-#endif
-}
-
-static inline void smpboot_clear_io_apic(void)
-{
-#ifdef CONFIG_X86_IO_APIC
-	nr_ioapics = 0;
-#endif
-}
+#include "../mach-default/smpboot_hooks.h"
-- 
cgit v1.2.3-70-g09d2


From b70d9c2473ffbe327f5d7364bfbf67e94c440af8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 15:12:05 +0200
Subject: x86, VisWS: turn into generic arch, update
 asm-x86/mach-visws/mach_apicdef.h

update asm-x86/mach-visws/mach_apicdef.h to the generic version.

This should work fine as VISWS has a standard local APIC and thus
its mach_apicdef.h copy is just an ancient version of the generic code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-visws/mach_apicdef.h | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-visws/mach_apicdef.h b/include/asm-x86/mach-visws/mach_apicdef.h
index 826cfa97d77..e4b29ba37de 100644
--- a/include/asm-x86/mach-visws/mach_apicdef.h
+++ b/include/asm-x86/mach-visws/mach_apicdef.h
@@ -1,12 +1,24 @@
 #ifndef __ASM_MACH_APICDEF_H
 #define __ASM_MACH_APICDEF_H
 
-#define         APIC_ID_MASK            (0xF<<24)
+#include <asm/apic.h>
 
-static inline unsigned get_apic_id(unsigned long x)
+#ifdef CONFIG_X86_64
+#define	APIC_ID_MASK		(0xFFu<<24)
+#define GET_APIC_ID(x)          (((x)>>24)&0xFFu)
+#define	SET_APIC_ID(x)		(((x)<<24))
+#else
+#define		APIC_ID_MASK		(0xF<<24)
+static inline unsigned get_apic_id(unsigned long x) 
 {
-	        return (((x)>>24)&0xF);
-}
-#define         GET_APIC_ID(x)  get_apic_id(x)
+	unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
+	if (APIC_XAPIC(ver))
+		return (((x)>>24)&0xFF);
+	else
+		return (((x)>>24)&0xF);
+} 
+
+#define		GET_APIC_ID(x)	get_apic_id(x)
+#endif
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 18c413e27e1585358cedc22e450847e3240006ff Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 15:13:41 +0200
Subject: x86, VisWS: turn into generic arch, eliminate
 asm-x86/mach-visws/mach_apicdef.h

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-visws/mach_apicdef.h | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-visws/mach_apicdef.h b/include/asm-x86/mach-visws/mach_apicdef.h
index e4b29ba37de..42711d152a9 100644
--- a/include/asm-x86/mach-visws/mach_apicdef.h
+++ b/include/asm-x86/mach-visws/mach_apicdef.h
@@ -1,24 +1 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
-
-#include <asm/apic.h>
-
-#ifdef CONFIG_X86_64
-#define	APIC_ID_MASK		(0xFFu<<24)
-#define GET_APIC_ID(x)          (((x)>>24)&0xFFu)
-#define	SET_APIC_ID(x)		(((x)<<24))
-#else
-#define		APIC_ID_MASK		(0xF<<24)
-static inline unsigned get_apic_id(unsigned long x) 
-{
-	unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
-	if (APIC_XAPIC(ver))
-		return (((x)>>24)&0xFF);
-	else
-		return (((x)>>24)&0xF);
-} 
-
-#define		GET_APIC_ID(x)	get_apic_id(x)
-#endif
-
-#endif
+#include "../mach-default/mach_apicdef.h"
-- 
cgit v1.2.3-70-g09d2


From b4b86416712d79a77cdc53756751b3b91fbb7a3d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 15:25:21 +0200
Subject: x86, VisWS: turn into generic arch, create include/asm-x86/visws/

move the include/asm-x86/mach-visws/ VISWS specific hardware
details include files into include/asm-x86/visws, to be used from
generic code.

No code changed.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mach-visws/setup_visws.c    |   4 +-
 arch/x86/mach-visws/traps.c          |   6 +-
 arch/x86/mach-visws/visws_apic.c     |   3 +-
 arch/x86/pci/visws.c                 |   4 +-
 include/asm-x86/mach-visws/cobalt.h  | 125 -----------------------------------
 include/asm-x86/mach-visws/lithium.h |  53 ---------------
 include/asm-x86/mach-visws/piix4.h   | 107 ------------------------------
 include/asm-x86/visws/cobalt.h       | 125 +++++++++++++++++++++++++++++++++++
 include/asm-x86/visws/lithium.h      |  53 +++++++++++++++
 include/asm-x86/visws/piix4.h        | 107 ++++++++++++++++++++++++++++++
 10 files changed, 293 insertions(+), 294 deletions(-)
 delete mode 100644 include/asm-x86/mach-visws/cobalt.h
 delete mode 100644 include/asm-x86/mach-visws/lithium.h
 delete mode 100644 include/asm-x86/mach-visws/piix4.h
 create mode 100644 include/asm-x86/visws/cobalt.h
 create mode 100644 include/asm-x86/visws/lithium.h
 create mode 100644 include/asm-x86/visws/piix4.h

(limited to 'include/asm-x86')

diff --git a/arch/x86/mach-visws/setup_visws.c b/arch/x86/mach-visws/setup_visws.c
index bbc149f78a4..e95e9499c8c 100644
--- a/arch/x86/mach-visws/setup_visws.c
+++ b/arch/x86/mach-visws/setup_visws.c
@@ -8,6 +8,8 @@
 #include <linux/init.h>
 #include <linux/smp.h>
 
+#include <asm/visws/cobalt.h>
+#include <asm/visws/piix4.h>
 #include <asm/arch_hooks.h>
 #include <asm/fixmap.h>
 #include <asm/reboot.h>
@@ -18,8 +20,6 @@
 
 #include <mach_ipi.h>
 
-#include "cobalt.h"
-#include "piix4.h"
 #include "mach_apic.h"
 
 #include <linux/init.h>
diff --git a/arch/x86/mach-visws/traps.c b/arch/x86/mach-visws/traps.c
index 8a160ec147f..e5e6492c267 100644
--- a/arch/x86/mach-visws/traps.c
+++ b/arch/x86/mach-visws/traps.c
@@ -7,10 +7,10 @@
 #include <linux/pci_ids.h>
 
 #include <asm/io.h>
-#include <asm/arch_hooks.h>
 #include <asm/apic.h>
-#include "cobalt.h"
-#include "lithium.h"
+#include <asm/arch_hooks.h>
+#include <asm/visws/cobalt.h>
+#include <asm/visws/lithium.h>
 
 
 #define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c
index d8b2cfd85d9..6c02c8d0993 100644
--- a/arch/x86/mach-visws/visws_apic.c
+++ b/arch/x86/mach-visws/visws_apic.c
@@ -22,8 +22,7 @@
 #include <asm/apic.h>
 #include <asm/i8259.h>
 #include <asm/irq_vectors.h>
-
-#include "cobalt.h"
+#include <asm/visws/cobalt.h>
 
 static DEFINE_SPINLOCK(cobalt_lock);
 
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 9b883890c0b..1a7bed492bb 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -9,8 +9,8 @@
 #include <linux/init.h>
 
 #include <asm/setup.h>
-#include "cobalt.h"
-#include "lithium.h"
+#include <asm/visws/cobalt.h>
+#include <asm/visws/lithium.h>
 
 #include "pci.h"
 
diff --git a/include/asm-x86/mach-visws/cobalt.h b/include/asm-x86/mach-visws/cobalt.h
deleted file mode 100644
index 995258831b7..00000000000
--- a/include/asm-x86/mach-visws/cobalt.h
+++ /dev/null
@@ -1,125 +0,0 @@
-#ifndef __I386_SGI_COBALT_H
-#define __I386_SGI_COBALT_H
-
-#include <asm/fixmap.h>
-
-/*
- * Cobalt SGI Visual Workstation system ASIC
- */ 
-
-#define CO_CPU_NUM_PHYS 0x1e00
-#define CO_CPU_TAB_PHYS (CO_CPU_NUM_PHYS + 2)
-
-#define CO_CPU_MAX 4
-
-#define	CO_CPU_PHYS		0xc2000000
-#define	CO_APIC_PHYS		0xc4000000
-
-/* see set_fixmap() and asm/fixmap.h */
-#define	CO_CPU_VADDR		(fix_to_virt(FIX_CO_CPU))
-#define	CO_APIC_VADDR		(fix_to_virt(FIX_CO_APIC))
-
-/* Cobalt CPU registers -- relative to CO_CPU_VADDR, use co_cpu_*() */
-#define	CO_CPU_REV		0x08
-#define	CO_CPU_CTRL		0x10
-#define	CO_CPU_STAT		0x20
-#define	CO_CPU_TIMEVAL		0x30
-
-/* CO_CPU_CTRL bits */
-#define	CO_CTRL_TIMERUN		0x04		/* 0 == disabled */
-#define	CO_CTRL_TIMEMASK	0x08		/* 0 == unmasked */
-
-/* CO_CPU_STATUS bits */
-#define	CO_STAT_TIMEINTR	0x02	/* (r) 1 == int pend, (w) 0 == clear */
-
-/* CO_CPU_TIMEVAL value */
-#define	CO_TIME_HZ		100000000	/* Cobalt core rate */
-
-/* Cobalt APIC registers -- relative to CO_APIC_VADDR, use co_apic_*() */
-#define	CO_APIC_HI(n)		(((n) * 0x10) + 4)
-#define	CO_APIC_LO(n)		((n) * 0x10)
-#define	CO_APIC_ID		0x0ffc
-
-/* CO_APIC_ID bits */
-#define	CO_APIC_ENABLE		0x00000100
-
-/* CO_APIC_LO bits */
-#define	CO_APIC_MASK		0x00010000	/* 0 = enabled */
-#define	CO_APIC_LEVEL		0x00008000	/* 0 = edge */
-
-/*
- * Where things are physically wired to Cobalt
- * #defines with no board _<type>_<rev>_ are common to all (thus far)
- */
-#define	CO_APIC_IDE0		4
-#define CO_APIC_IDE1		2		/* Only on 320 */
-
-#define	CO_APIC_8259		12		/* serial, floppy, par-l-l */
-
-/* Lithium PCI Bridge A -- "the one with 82557 Ethernet" */
-#define	CO_APIC_PCIA_BASE0	0 /* and 1 */	/* slot 0, line 0 */
-#define	CO_APIC_PCIA_BASE123	5 /* and 6 */	/* slot 0, line 1 */
-
-#define	CO_APIC_PIIX4_USB	7		/* this one is weird */
-
-/* Lithium PCI Bridge B -- "the one with PIIX4" */
-#define	CO_APIC_PCIB_BASE0	8 /* and 9-12 *//* slot 0, line 0 */
-#define	CO_APIC_PCIB_BASE123	13 /* 14.15 */	/* slot 0, line 1 */
-
-#define	CO_APIC_VIDOUT0		16
-#define	CO_APIC_VIDOUT1		17
-#define	CO_APIC_VIDIN0		18
-#define	CO_APIC_VIDIN1		19
-
-#define	CO_APIC_LI_AUDIO	22
-
-#define	CO_APIC_AS		24
-#define	CO_APIC_RE		25
-
-#define CO_APIC_CPU		28		/* Timer and Cache interrupt */
-#define	CO_APIC_NMI		29
-#define	CO_APIC_LAST		CO_APIC_NMI
-
-/*
- * This is how irqs are assigned on the Visual Workstation.
- * Legacy devices get irq's 1-15 (system clock is 0 and is CO_APIC_CPU).
- * All other devices (including PCI) go to Cobalt and are irq's 16 on up.
- */
-#define	CO_IRQ_APIC0	16			/* irq of apic entry 0 */
-#define	IS_CO_APIC(irq)	((irq) >= CO_IRQ_APIC0)
-#define	CO_IRQ(apic)	(CO_IRQ_APIC0 + (apic))	/* apic ent to irq */
-#define	CO_APIC(irq)	((irq) - CO_IRQ_APIC0)	/* irq to apic ent */
-#define CO_IRQ_IDE0	14			/* knowledge of... */
-#define CO_IRQ_IDE1	15			/* ... ide driver defaults! */
-#define	CO_IRQ_8259	CO_IRQ(CO_APIC_8259)
-
-#ifdef CONFIG_X86_VISWS_APIC
-static inline void co_cpu_write(unsigned long reg, unsigned long v)
-{
-	*((volatile unsigned long *)(CO_CPU_VADDR+reg))=v;
-}
-
-static inline unsigned long co_cpu_read(unsigned long reg)
-{
-	return *((volatile unsigned long *)(CO_CPU_VADDR+reg));
-}            
-             
-static inline void co_apic_write(unsigned long reg, unsigned long v)
-{
-	*((volatile unsigned long *)(CO_APIC_VADDR+reg))=v;
-}            
-             
-static inline unsigned long co_apic_read(unsigned long reg)
-{
-	return *((volatile unsigned long *)(CO_APIC_VADDR+reg));
-}
-#endif
-
-extern char visws_board_type;
-
-#define	VISWS_320	0
-#define	VISWS_540	1
-
-extern char visws_board_rev;
-
-#endif /* __I386_SGI_COBALT_H */
diff --git a/include/asm-x86/mach-visws/lithium.h b/include/asm-x86/mach-visws/lithium.h
deleted file mode 100644
index dfcd4f07ab8..00000000000
--- a/include/asm-x86/mach-visws/lithium.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef __I386_SGI_LITHIUM_H
-#define __I386_SGI_LITHIUM_H
-
-#include <asm/fixmap.h>
-
-/*
- * Lithium is the SGI Visual Workstation I/O ASIC
- */
-
-#define	LI_PCI_A_PHYS		0xfc000000	/* Enet is dev 3 */
-#define	LI_PCI_B_PHYS		0xfd000000	/* PIIX4 is here */
-
-/* see set_fixmap() and asm/fixmap.h */
-#define LI_PCIA_VADDR   (fix_to_virt(FIX_LI_PCIA))
-#define LI_PCIB_VADDR   (fix_to_virt(FIX_LI_PCIB))
-
-/* Not a standard PCI? (not in linux/pci.h) */
-#define	LI_PCI_BUSNUM	0x44			/* lo8: primary, hi8: sub */
-#define LI_PCI_INTEN    0x46
-
-/* LI_PCI_INTENT bits */
-#define	LI_INTA_0	0x0001
-#define	LI_INTA_1	0x0002
-#define	LI_INTA_2	0x0004
-#define	LI_INTA_3	0x0008
-#define	LI_INTA_4	0x0010
-#define	LI_INTB		0x0020
-#define	LI_INTC		0x0040
-#define	LI_INTD		0x0080
-
-/* More special purpose macros... */
-static inline void li_pcia_write16(unsigned long reg, unsigned short v)
-{
-	*((volatile unsigned short *)(LI_PCIA_VADDR+reg))=v;
-}
-
-static inline unsigned short li_pcia_read16(unsigned long reg)
-{
-	 return *((volatile unsigned short *)(LI_PCIA_VADDR+reg));
-}
-
-static inline void li_pcib_write16(unsigned long reg, unsigned short v)
-{
-	*((volatile unsigned short *)(LI_PCIB_VADDR+reg))=v;
-}
-
-static inline unsigned short li_pcib_read16(unsigned long reg)
-{
-	return *((volatile unsigned short *)(LI_PCIB_VADDR+reg));
-}
-
-#endif
-
diff --git a/include/asm-x86/mach-visws/piix4.h b/include/asm-x86/mach-visws/piix4.h
deleted file mode 100644
index 83ea4f46e41..00000000000
--- a/include/asm-x86/mach-visws/piix4.h
+++ /dev/null
@@ -1,107 +0,0 @@
-#ifndef __I386_SGI_PIIX_H
-#define __I386_SGI_PIIX_H
-
-/*
- * PIIX4 as used on SGI Visual Workstations
- */
-
-#define	PIIX_PM_START		0x0F80
-
-#define	SIO_GPIO_START		0x0FC0
-
-#define	SIO_PM_START		0x0FC8
-
-#define	PMBASE			PIIX_PM_START
-#define	GPIREG0			(PMBASE+0x30)
-#define	GPIREG(x)		(GPIREG0+((x)/8))
-#define	GPIBIT(x)		(1 << ((x)%8))
-
-#define	PIIX_GPI_BD_ID1		18
-#define	PIIX_GPI_BD_ID2		19
-#define	PIIX_GPI_BD_ID3		20
-#define	PIIX_GPI_BD_ID4		21
-#define	PIIX_GPI_BD_REG		GPIREG(PIIX_GPI_BD_ID1)
-#define	PIIX_GPI_BD_MASK	(GPIBIT(PIIX_GPI_BD_ID1) | \
-				GPIBIT(PIIX_GPI_BD_ID2) | \
-				GPIBIT(PIIX_GPI_BD_ID3) | \
-				GPIBIT(PIIX_GPI_BD_ID4) )
-
-#define	PIIX_GPI_BD_SHIFT	(PIIX_GPI_BD_ID1 % 8)
-
-#define	SIO_INDEX		0x2e
-#define	SIO_DATA		0x2f
-
-#define	SIO_DEV_SEL		0x7
-#define	SIO_DEV_ENB		0x30
-#define	SIO_DEV_MSB		0x60
-#define	SIO_DEV_LSB		0x61
-
-#define	SIO_GP_DEV		0x7
-
-#define	SIO_GP_BASE		SIO_GPIO_START
-#define	SIO_GP_MSB		(SIO_GP_BASE>>8)
-#define	SIO_GP_LSB		(SIO_GP_BASE&0xff)
-
-#define	SIO_GP_DATA1		(SIO_GP_BASE+0)
-
-#define	SIO_PM_DEV		0x8
-
-#define	SIO_PM_BASE		SIO_PM_START
-#define	SIO_PM_MSB		(SIO_PM_BASE>>8)
-#define	SIO_PM_LSB		(SIO_PM_BASE&0xff)
-#define	SIO_PM_INDEX		(SIO_PM_BASE+0)
-#define	SIO_PM_DATA		(SIO_PM_BASE+1)
-
-#define	SIO_PM_FER2		0x1
-
-#define	SIO_PM_GP_EN		0x80
-
-
-
-/*
- * This is the dev/reg where generating a config cycle will
- * result in a PCI special cycle.
- */
-#define SPECIAL_DEV		0xff
-#define SPECIAL_REG		0x00
-
-/*
- * PIIX4 needs to see a special cycle with the following data
- * to be convinced the processor has gone into the stop grant
- * state.  PIIX4 insists on seeing this before it will power
- * down a system.
- */
-#define PIIX_SPECIAL_STOP		0x00120002
-
-#define PIIX4_RESET_PORT	0xcf9
-#define PIIX4_RESET_VAL		0x6
-
-#define PMSTS_PORT		0xf80	// 2 bytes	PM Status
-#define PMEN_PORT		0xf82	// 2 bytes	PM Enable
-#define	PMCNTRL_PORT		0xf84	// 2 bytes	PM Control
-
-#define PM_SUSPEND_ENABLE	0x2000	// start sequence to suspend state
-
-/*
- * PMSTS and PMEN I/O bit definitions.
- * (Bits are the same in both registers)
- */
-#define PM_STS_RSM		(1<<15)	// Resume Status
-#define PM_STS_PWRBTNOR		(1<<11)	// Power Button Override
-#define PM_STS_RTC		(1<<10)	// RTC status
-#define PM_STS_PWRBTN		(1<<8)	// Power Button Pressed?
-#define PM_STS_GBL		(1<<5)	// Global Status
-#define PM_STS_BM		(1<<4)	// Bus Master Status
-#define PM_STS_TMROF		(1<<0)	// Timer Overflow Status.
-
-/*
- * Stop clock GPI register
- */
-#define PIIX_GPIREG0			(0xf80 + 0x30)
-
-/*
- * Stop clock GPI bit in GPIREG0
- */
-#define	PIIX_GPI_STPCLK		0x4	// STPCLK signal routed back in
-
-#endif
diff --git a/include/asm-x86/visws/cobalt.h b/include/asm-x86/visws/cobalt.h
new file mode 100644
index 00000000000..995258831b7
--- /dev/null
+++ b/include/asm-x86/visws/cobalt.h
@@ -0,0 +1,125 @@
+#ifndef __I386_SGI_COBALT_H
+#define __I386_SGI_COBALT_H
+
+#include <asm/fixmap.h>
+
+/*
+ * Cobalt SGI Visual Workstation system ASIC
+ */ 
+
+#define CO_CPU_NUM_PHYS 0x1e00
+#define CO_CPU_TAB_PHYS (CO_CPU_NUM_PHYS + 2)
+
+#define CO_CPU_MAX 4
+
+#define	CO_CPU_PHYS		0xc2000000
+#define	CO_APIC_PHYS		0xc4000000
+
+/* see set_fixmap() and asm/fixmap.h */
+#define	CO_CPU_VADDR		(fix_to_virt(FIX_CO_CPU))
+#define	CO_APIC_VADDR		(fix_to_virt(FIX_CO_APIC))
+
+/* Cobalt CPU registers -- relative to CO_CPU_VADDR, use co_cpu_*() */
+#define	CO_CPU_REV		0x08
+#define	CO_CPU_CTRL		0x10
+#define	CO_CPU_STAT		0x20
+#define	CO_CPU_TIMEVAL		0x30
+
+/* CO_CPU_CTRL bits */
+#define	CO_CTRL_TIMERUN		0x04		/* 0 == disabled */
+#define	CO_CTRL_TIMEMASK	0x08		/* 0 == unmasked */
+
+/* CO_CPU_STATUS bits */
+#define	CO_STAT_TIMEINTR	0x02	/* (r) 1 == int pend, (w) 0 == clear */
+
+/* CO_CPU_TIMEVAL value */
+#define	CO_TIME_HZ		100000000	/* Cobalt core rate */
+
+/* Cobalt APIC registers -- relative to CO_APIC_VADDR, use co_apic_*() */
+#define	CO_APIC_HI(n)		(((n) * 0x10) + 4)
+#define	CO_APIC_LO(n)		((n) * 0x10)
+#define	CO_APIC_ID		0x0ffc
+
+/* CO_APIC_ID bits */
+#define	CO_APIC_ENABLE		0x00000100
+
+/* CO_APIC_LO bits */
+#define	CO_APIC_MASK		0x00010000	/* 0 = enabled */
+#define	CO_APIC_LEVEL		0x00008000	/* 0 = edge */
+
+/*
+ * Where things are physically wired to Cobalt
+ * #defines with no board _<type>_<rev>_ are common to all (thus far)
+ */
+#define	CO_APIC_IDE0		4
+#define CO_APIC_IDE1		2		/* Only on 320 */
+
+#define	CO_APIC_8259		12		/* serial, floppy, par-l-l */
+
+/* Lithium PCI Bridge A -- "the one with 82557 Ethernet" */
+#define	CO_APIC_PCIA_BASE0	0 /* and 1 */	/* slot 0, line 0 */
+#define	CO_APIC_PCIA_BASE123	5 /* and 6 */	/* slot 0, line 1 */
+
+#define	CO_APIC_PIIX4_USB	7		/* this one is weird */
+
+/* Lithium PCI Bridge B -- "the one with PIIX4" */
+#define	CO_APIC_PCIB_BASE0	8 /* and 9-12 *//* slot 0, line 0 */
+#define	CO_APIC_PCIB_BASE123	13 /* 14.15 */	/* slot 0, line 1 */
+
+#define	CO_APIC_VIDOUT0		16
+#define	CO_APIC_VIDOUT1		17
+#define	CO_APIC_VIDIN0		18
+#define	CO_APIC_VIDIN1		19
+
+#define	CO_APIC_LI_AUDIO	22
+
+#define	CO_APIC_AS		24
+#define	CO_APIC_RE		25
+
+#define CO_APIC_CPU		28		/* Timer and Cache interrupt */
+#define	CO_APIC_NMI		29
+#define	CO_APIC_LAST		CO_APIC_NMI
+
+/*
+ * This is how irqs are assigned on the Visual Workstation.
+ * Legacy devices get irq's 1-15 (system clock is 0 and is CO_APIC_CPU).
+ * All other devices (including PCI) go to Cobalt and are irq's 16 on up.
+ */
+#define	CO_IRQ_APIC0	16			/* irq of apic entry 0 */
+#define	IS_CO_APIC(irq)	((irq) >= CO_IRQ_APIC0)
+#define	CO_IRQ(apic)	(CO_IRQ_APIC0 + (apic))	/* apic ent to irq */
+#define	CO_APIC(irq)	((irq) - CO_IRQ_APIC0)	/* irq to apic ent */
+#define CO_IRQ_IDE0	14			/* knowledge of... */
+#define CO_IRQ_IDE1	15			/* ... ide driver defaults! */
+#define	CO_IRQ_8259	CO_IRQ(CO_APIC_8259)
+
+#ifdef CONFIG_X86_VISWS_APIC
+static inline void co_cpu_write(unsigned long reg, unsigned long v)
+{
+	*((volatile unsigned long *)(CO_CPU_VADDR+reg))=v;
+}
+
+static inline unsigned long co_cpu_read(unsigned long reg)
+{
+	return *((volatile unsigned long *)(CO_CPU_VADDR+reg));
+}            
+             
+static inline void co_apic_write(unsigned long reg, unsigned long v)
+{
+	*((volatile unsigned long *)(CO_APIC_VADDR+reg))=v;
+}            
+             
+static inline unsigned long co_apic_read(unsigned long reg)
+{
+	return *((volatile unsigned long *)(CO_APIC_VADDR+reg));
+}
+#endif
+
+extern char visws_board_type;
+
+#define	VISWS_320	0
+#define	VISWS_540	1
+
+extern char visws_board_rev;
+
+#endif /* __I386_SGI_COBALT_H */
diff --git a/include/asm-x86/visws/lithium.h b/include/asm-x86/visws/lithium.h
new file mode 100644
index 00000000000..dfcd4f07ab8
--- /dev/null
+++ b/include/asm-x86/visws/lithium.h
@@ -0,0 +1,53 @@
+#ifndef __I386_SGI_LITHIUM_H
+#define __I386_SGI_LITHIUM_H
+
+#include <asm/fixmap.h>
+
+/*
+ * Lithium is the SGI Visual Workstation I/O ASIC
+ */
+
+#define	LI_PCI_A_PHYS		0xfc000000	/* Enet is dev 3 */
+#define	LI_PCI_B_PHYS		0xfd000000	/* PIIX4 is here */
+
+/* see set_fixmap() and asm/fixmap.h */
+#define LI_PCIA_VADDR   (fix_to_virt(FIX_LI_PCIA))
+#define LI_PCIB_VADDR   (fix_to_virt(FIX_LI_PCIB))
+
+/* Not a standard PCI? (not in linux/pci.h) */
+#define	LI_PCI_BUSNUM	0x44			/* lo8: primary, hi8: sub */
+#define LI_PCI_INTEN    0x46
+
+/* LI_PCI_INTENT bits */
+#define	LI_INTA_0	0x0001
+#define	LI_INTA_1	0x0002
+#define	LI_INTA_2	0x0004
+#define	LI_INTA_3	0x0008
+#define	LI_INTA_4	0x0010
+#define	LI_INTB		0x0020
+#define	LI_INTC		0x0040
+#define	LI_INTD		0x0080
+
+/* More special purpose macros... */
+static inline void li_pcia_write16(unsigned long reg, unsigned short v)
+{
+	*((volatile unsigned short *)(LI_PCIA_VADDR+reg))=v;
+}
+
+static inline unsigned short li_pcia_read16(unsigned long reg)
+{
+	 return *((volatile unsigned short *)(LI_PCIA_VADDR+reg));
+}
+
+static inline void li_pcib_write16(unsigned long reg, unsigned short v)
+{
+	*((volatile unsigned short *)(LI_PCIB_VADDR+reg))=v;
+}
+
+static inline unsigned short li_pcib_read16(unsigned long reg)
+{
+	return *((volatile unsigned short *)(LI_PCIB_VADDR+reg));
+}
+
+#endif
+
diff --git a/include/asm-x86/visws/piix4.h b/include/asm-x86/visws/piix4.h
new file mode 100644
index 00000000000..83ea4f46e41
--- /dev/null
+++ b/include/asm-x86/visws/piix4.h
@@ -0,0 +1,107 @@
+#ifndef __I386_SGI_PIIX_H
+#define __I386_SGI_PIIX_H
+
+/*
+ * PIIX4 as used on SGI Visual Workstations
+ */
+
+#define	PIIX_PM_START		0x0F80
+
+#define	SIO_GPIO_START		0x0FC0
+
+#define	SIO_PM_START		0x0FC8
+
+#define	PMBASE			PIIX_PM_START
+#define	GPIREG0			(PMBASE+0x30)
+#define	GPIREG(x)		(GPIREG0+((x)/8))
+#define	GPIBIT(x)		(1 << ((x)%8))
+
+#define	PIIX_GPI_BD_ID1		18
+#define	PIIX_GPI_BD_ID2		19
+#define	PIIX_GPI_BD_ID3		20
+#define	PIIX_GPI_BD_ID4		21
+#define	PIIX_GPI_BD_REG		GPIREG(PIIX_GPI_BD_ID1)
+#define	PIIX_GPI_BD_MASK	(GPIBIT(PIIX_GPI_BD_ID1) | \
+				GPIBIT(PIIX_GPI_BD_ID2) | \
+				GPIBIT(PIIX_GPI_BD_ID3) | \
+				GPIBIT(PIIX_GPI_BD_ID4) )
+
+#define	PIIX_GPI_BD_SHIFT	(PIIX_GPI_BD_ID1 % 8)
+
+#define	SIO_INDEX		0x2e
+#define	SIO_DATA		0x2f
+
+#define	SIO_DEV_SEL		0x7
+#define	SIO_DEV_ENB		0x30
+#define	SIO_DEV_MSB		0x60
+#define	SIO_DEV_LSB		0x61
+
+#define	SIO_GP_DEV		0x7
+
+#define	SIO_GP_BASE		SIO_GPIO_START
+#define	SIO_GP_MSB		(SIO_GP_BASE>>8)
+#define	SIO_GP_LSB		(SIO_GP_BASE&0xff)
+
+#define	SIO_GP_DATA1		(SIO_GP_BASE+0)
+
+#define	SIO_PM_DEV		0x8
+
+#define	SIO_PM_BASE		SIO_PM_START
+#define	SIO_PM_MSB		(SIO_PM_BASE>>8)
+#define	SIO_PM_LSB		(SIO_PM_BASE&0xff)
+#define	SIO_PM_INDEX		(SIO_PM_BASE+0)
+#define	SIO_PM_DATA		(SIO_PM_BASE+1)
+
+#define	SIO_PM_FER2		0x1
+
+#define	SIO_PM_GP_EN		0x80
+
+
+
+/*
+ * This is the dev/reg where generating a config cycle will
+ * result in a PCI special cycle.
+ */
+#define SPECIAL_DEV		0xff
+#define SPECIAL_REG		0x00
+
+/*
+ * PIIX4 needs to see a special cycle with the following data
+ * to be convinced the processor has gone into the stop grant
+ * state.  PIIX4 insists on seeing this before it will power
+ * down a system.
+ */
+#define PIIX_SPECIAL_STOP		0x00120002
+
+#define PIIX4_RESET_PORT	0xcf9
+#define PIIX4_RESET_VAL		0x6
+
+#define PMSTS_PORT		0xf80	// 2 bytes	PM Status
+#define PMEN_PORT		0xf82	// 2 bytes	PM Enable
+#define	PMCNTRL_PORT		0xf84	// 2 bytes	PM Control
+
+#define PM_SUSPEND_ENABLE	0x2000	// start sequence to suspend state
+
+/*
+ * PMSTS and PMEN I/O bit definitions.
+ * (Bits are the same in both registers)
+ */
+#define PM_STS_RSM		(1<<15)	// Resume Status
+#define PM_STS_PWRBTNOR		(1<<11)	// Power Button Override
+#define PM_STS_RTC		(1<<10)	// RTC status
+#define PM_STS_PWRBTN		(1<<8)	// Power Button Pressed?
+#define PM_STS_GBL		(1<<5)	// Global Status
+#define PM_STS_BM		(1<<4)	// Bus Master Status
+#define PM_STS_TMROF		(1<<0)	// Timer Overflow Status.
+
+/*
+ * Stop clock GPI register
+ */
+#define PIIX_GPIREG0			(0xf80 + 0x30)
+
+/*
+ * Stop clock GPI bit in GPIREG0
+ */
+#define	PIIX_GPI_STPCLK		0x4	// STPCLK signal routed back in
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From efd746b8892d1d40c43c3d518b3bde9e56238ce8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 15:31:35 +0200
Subject: x86, VisWS: turn into generic arch, move definitions

move the SGIVW definitions from setup_arch.h into its own header file.

preparation for turning VISWS into a generic PC architecture.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/video/sgivwfb.c                 | 3 +--
 include/asm-x86/mach-visws/setup_arch.h | 3 ---
 include/asm-x86/visws/sgivw.h           | 5 +++++
 3 files changed, 6 insertions(+), 5 deletions(-)
 create mode 100644 include/asm-x86/visws/sgivw.h

(limited to 'include/asm-x86')

diff --git a/drivers/video/sgivwfb.c b/drivers/video/sgivwfb.c
index 4fb16240c04..f5252c2552f 100644
--- a/drivers/video/sgivwfb.c
+++ b/drivers/video/sgivwfb.c
@@ -21,8 +21,7 @@
 
 #include <asm/io.h>
 #include <asm/mtrr.h>
-
-#include <setup_arch.h>
+#include <asm/visws/sgivw.h>
 
 #define INCLUDE_TIMING_TABLE_DATA
 #define DBE_REG_BASE par->regs
diff --git a/include/asm-x86/mach-visws/setup_arch.h b/include/asm-x86/mach-visws/setup_arch.h
index b8f5dd829db..ba70471ff8c 100644
--- a/include/asm-x86/mach-visws/setup_arch.h
+++ b/include/asm-x86/mach-visws/setup_arch.h
@@ -1,6 +1,3 @@
 /* Hook to call BIOS initialisation function */
 
-extern unsigned long sgivwfb_mem_phys;
-extern unsigned long sgivwfb_mem_size;
-
 /* no action for visws */
diff --git a/include/asm-x86/visws/sgivw.h b/include/asm-x86/visws/sgivw.h
new file mode 100644
index 00000000000..5fbf63e1003
--- /dev/null
+++ b/include/asm-x86/visws/sgivw.h
@@ -0,0 +1,5 @@
+/*
+ * Frame buffer position and size:
+ */
+extern unsigned long sgivwfb_mem_phys;
+extern unsigned long sgivwfb_mem_size;
-- 
cgit v1.2.3-70-g09d2


From 4191894b68a85bd8da935fb9ac4c882a1fa8651c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 15:33:49 +0200
Subject: x86, VisWS: turn into generic arch, eliminate
 include/asm-x86/mach-visws/setup_arch.h

use the generic version of setup_arch.h - it's the same.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/mach-visws/setup_arch.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/mach-visws/setup_arch.h b/include/asm-x86/mach-visws/setup_arch.h
index ba70471ff8c..fa4766ca2d1 100644
--- a/include/asm-x86/mach-visws/setup_arch.h
+++ b/include/asm-x86/mach-visws/setup_arch.h
@@ -1,3 +1 @@
-/* Hook to call BIOS initialisation function */
-
-/* no action for visws */
+#include "../mach-default/setup_arch.h"
-- 
cgit v1.2.3-70-g09d2


From 8bfaba873f0cc81c1747d6787f2721926192a3dc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 10 Jul 2008 15:42:50 +0200
Subject: x86, VisWS: turn into generic arch, add NR_IRQS quirk

NR_IRQS: let VISWS be just a sub-case of the generic code.

This can create a somewhat larger irq_desc[] array if NR_CPUS is high
but that should not worry VisWS which has 4 CPUs at most.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/irq_vectors.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
index b58581e2e24..0ac864ef3cd 100644
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -107,9 +107,9 @@
 #define LAST_VM86_IRQ		15
 #define invalid_vm86_irq(irq)	((irq) < 3 || (irq) > 15)
 
-#if !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER)
+#if !defined(CONFIG_X86_VOYAGER)
 
-# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT)
+# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS)
 
 #  define NR_IRQS		224
 
-- 
cgit v1.2.3-70-g09d2


From f361a450bf1ad14e2b003217dbf3958638631265 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 10 Jul 2008 20:38:26 -0700
Subject: x86: introduce max_low_pfn_mapped for 64-bit

when more than 4g memory is installed, don't map the big hole below 4g.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c  |  2 +-
 arch/x86/kernel/cpu/amd_64.c | 10 +++++++---
 arch/x86/kernel/e820.c       | 23 ++++++++++++++++++++---
 arch/x86/kernel/efi.c        |  2 +-
 arch/x86/kernel/setup.c      | 22 ++++++++++++++++++----
 arch/x86/mm/init_32.c        |  1 +
 arch/x86/mm/init_64.c        |  1 +
 arch/x86/mm/pageattr.c       | 19 +++++++++++++++++--
 arch/x86/mm/pat.c            |  3 ++-
 arch/x86/pci/i386.c          |  4 +++-
 include/asm-x86/e820.h       |  3 ++-
 include/asm-x86/page.h       |  1 +
 12 files changed, 74 insertions(+), 17 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a31a579a47c..9c981c4a364 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -130,7 +130,7 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
 	if (!phys || !size)
 		return NULL;
 
-	if (phys+size <= (max_pfn_mapped << PAGE_SHIFT))
+	if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT))
 		return __va(phys);
 
 	offset = phys & (PAGE_SIZE - 1);
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 958526d6a74..bd182b7616e 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -199,10 +199,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		 * Don't do it for gbpages because there seems very little
 		 * benefit in doing so.
 		 */
-		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
-		    (tseg >> PMD_SHIFT) <
-			(max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
+		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+		    if ((tseg>>PMD_SHIFT) <
+				(max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
+			((tseg>>PMD_SHIFT) <
+				(max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
+			 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
 			set_memory_4k((unsigned long)__va(tseg), 1);
+		}
 	}
 }
 
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 3451e0b3f32..9f5002e0b35 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1056,7 +1056,7 @@ unsigned long __initdata end_user_pfn = MAX_ARCH_PFN;
 /*
  * Find the highest page frame number we have available
  */
-unsigned long __init e820_end(void)
+static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
 {
 	int i;
 	unsigned long last_pfn = 0;
@@ -1064,12 +1064,21 @@ unsigned long __init e820_end(void)
 
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
+		unsigned long start_pfn;
 		unsigned long end_pfn;
 
-		if (ei->type != E820_RAM)
+		if (ei->type != type)
 			continue;
 
+		start_pfn = ei->addr >> PAGE_SHIFT;
 		end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
+
+		if (start_pfn >= limit_pfn)
+			continue;
+		if (end_pfn > limit_pfn) {
+			last_pfn = limit_pfn;
+			break;
+		}
 		if (end_pfn > last_pfn)
 			last_pfn = end_pfn;
 	}
@@ -1083,7 +1092,15 @@ unsigned long __init e820_end(void)
 			 last_pfn, max_arch_pfn);
 	return last_pfn;
 }
+unsigned long __init e820_end_of_ram_pfn(void)
+{
+	return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
+}
 
+unsigned long __init e820_end_of_low_ram_pfn(void)
+{
+	return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
+}
 /*
  * Finds an active region in the address range from start_pfn to last_pfn and
  * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
@@ -1206,7 +1223,7 @@ static int __init parse_memmap_opt(char *p)
 		 * the real mem size before original memory map is
 		 * reset.
 		 */
-		saved_max_pfn = e820_end();
+		saved_max_pfn = e820_end_of_ram_pfn();
 #endif
 		e820.nr_map = 0;
 		userdef = 1;
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 94382faeadb..06cc8d4254b 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -473,7 +473,7 @@ void __init efi_enter_virtual_mode(void)
 		size = md->num_pages << EFI_PAGE_SHIFT;
 		end = md->phys_addr + size;
 
-		if (PFN_UP(end) <= max_pfn_mapped)
+		if (PFN_UP(end) <= max_low_pfn_mapped)
 			va = __va(md->phys_addr);
 		else
 			va = efi_ioremap(md->phys_addr, size);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a7c3471ea17..86fc2d62427 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -713,14 +713,14 @@ void __init setup_arch(char **cmdline_p)
 	 * partially used pages are not usable - thus
 	 * we are rounding upwards:
 	 */
-	max_pfn = e820_end();
+	max_pfn = e820_end_of_ram_pfn();
 
 	/* preallocate 4k for mptable mpc */
 	early_reserve_e820_mpc_new();
 	/* update e820 for memory not covered by WB MTRRs */
 	mtrr_bp_init();
 	if (mtrr_trim_uncached_memory(max_pfn))
-		max_pfn = e820_end();
+		max_pfn = e820_end_of_ram_pfn();
 
 #ifdef CONFIG_X86_32
 	/* max_low_pfn get updated here */
@@ -732,12 +732,26 @@ void __init setup_arch(char **cmdline_p)
 
 	/* How many end-of-memory variables you have, grandma! */
 	/* need this before calling reserve_initrd */
-	max_low_pfn = max_pfn;
+	if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
+		max_low_pfn = e820_end_of_low_ram_pfn();
+	else
+		max_low_pfn = max_pfn;
+
 	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
 #endif
 
 	/* max_pfn_mapped is updated here */
-	max_pfn_mapped = init_memory_mapping(0, (max_low_pfn << PAGE_SHIFT));
+	max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
+	max_pfn_mapped = max_low_pfn_mapped;
+
+#ifdef CONFIG_X86_64
+	if (max_pfn > max_low_pfn) {
+		max_pfn_mapped = init_memory_mapping(1UL<<32,
+						     max_pfn<<PAGE_SHIFT);
+		/* can we preseve max_low_pfn ?*/
+		max_low_pfn = max_pfn;
+	}
+#endif
 
 	/*
 	 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index b5a0fd5f4c5..029e8cffca9 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -50,6 +50,7 @@
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
+unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 48548ef7ddf..122bcef222f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -53,6 +53,7 @@
  * The direct mapping extends to max_pfn_mapped, so that we can directly access
  * apertures, ACPI and other tables without having to play with fixmaps.
  */
+unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
 static unsigned long dma_reserve __initdata;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index afd40054d15..0389cb8f6b1 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -536,8 +536,14 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 		set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
 
 	if (address >= (unsigned long)__va(0) &&
+		address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT))
+		split_page_count(level);
+
+#ifdef CONFIG_X86_64
+	if (address >= (unsigned long)__va(1UL<<32) &&
 		address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
 		split_page_count(level);
+#endif
 
 	/*
 	 * Install the new, split up pagetable. Important details here:
@@ -655,12 +661,21 @@ static int cpa_process_alias(struct cpa_data *cpa)
 	if (cpa->pfn > max_pfn_mapped)
 		return 0;
 
+#ifdef CONFIG_X86_64
+	if (cpa->pfn > max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
+		return 0;
+#endif
 	/*
 	 * No need to redo, when the primary call touched the direct
 	 * mapping already:
 	 */
-	if (!within(cpa->vaddr, PAGE_OFFSET,
-		    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
+	if (!(within(cpa->vaddr, PAGE_OFFSET,
+		    PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
+#ifdef CONFIG_X86_64
+		|| within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
+		    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
+#endif
+	)) {
 
 		alias_cpa = *cpa;
 		alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index a885a1019b8..749766c3c5c 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -449,7 +449,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 	if (retval < 0)
 		return 0;
 
-	if (pfn <= max_pfn_mapped &&
+	if (((pfn <= max_low_pfn_mapped) ||
+	     (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn <= max_pfn_mapped)) &&
 	    ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
 		free_memtype(offset, offset + size);
 		printk(KERN_INFO
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 6ccd7a108cd..5281e343dd9 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -334,7 +334,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 		flags = new_flags;
 	}
 
-	if (vma->vm_pgoff <= max_pfn_mapped &&
+	if (((vma->vm_pgoff <= max_low_pfn_mapped) ||
+	     (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
+	      vma->vm_pgoff <= max_pfn_mapped)) &&
 	    ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
 		free_memtype(addr, addr + len);
 		return -EINVAL;
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 78c03d7bf44..33e793e991d 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -99,7 +99,8 @@ extern void free_early(u64 start, u64 end);
 extern void early_res_to_bootmem(u64 start, u64 end);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 
-extern unsigned long e820_end(void);
+extern unsigned long e820_end_of_ram_pfn(void);
+extern unsigned long e820_end_of_low_ram_pfn(void);
 extern int e820_find_active_region(const struct e820entry *ei,
 				  unsigned long start_pfn,
 				  unsigned long last_pfn,
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index b52ed85f32f..28d7b4533b1 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -61,6 +61,7 @@ extern void map_devmem(unsigned long pfn, unsigned long size,
 extern void unmap_devmem(unsigned long pfn, unsigned long size,
 			 pgprot_t vma_prot);
 
+extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
 struct page;
-- 
cgit v1.2.3-70-g09d2


From 8d28aab59fe939be40efae870ced0b05caa259fb Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 10 Jul 2008 16:22:56 -0700
Subject: x86_64: add pseudo-features for 32-bit compat syscall

Add pseudo-feature bits to describe whether the CPU supports sysenter
and/or syscall from ia32-compat userspace.  This removes a hardcoded
test in vdso32-setup.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/centaur_64.c | 2 ++
 arch/x86/kernel/cpu/common_64.c  | 3 +++
 arch/x86/kernel/cpu/intel_64.c   | 2 ++
 include/asm-x86/cpufeature.h     | 4 ++--
 4 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
index 13526fd5cce..2026d2119cd 100644
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -10,6 +10,8 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
 {
 	if (c->x86 == 0x6 && c->x86_model >= 0xf)
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
+	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
 }
 
 static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 75185023529..36537ab9e56 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -314,6 +314,9 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 	if (c->extended_cpuid_level >= 0x80000007)
 		c->x86_power = cpuid_edx(0x80000007);
 
+	/* Assume all 64-bit CPUs support 32-bit syscall */
+	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
+
 	if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
 	    cpu_devs[c->x86_vendor]->c_early_init)
 		cpu_devs[c->x86_vendor]->c_early_init(c);
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
index fcb1cc9d75c..02f773399e3 100644
--- a/arch/x86/kernel/cpu/intel_64.c
+++ b/arch/x86/kernel/cpu/intel_64.c
@@ -12,6 +12,8 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
 	    (c->x86 == 0x6 && c->x86_model >= 0x0e))
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
+	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
 }
 
 /*
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 84a56da397b..75ef959db32 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -74,8 +74,8 @@
 #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
 #define X86_FEATURE_PEBS	(3*32+12)  /* Precise-Event Based Sampling */
 #define X86_FEATURE_BTS		(3*32+13)  /* Branch Trace Store */
-/* 14 free */
-/* 15 free */
+#define X86_FEATURE_SYSCALL32	(3*32+14)  /* syscall in ia32 userspace */
+#define X86_FEATURE_SYSENTER32	(3*32+15)  /* sysenter in ia32 userspace */
 #define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well on this CPU */
 #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
 #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
-- 
cgit v1.2.3-70-g09d2


From d9fc3fd3fab186447b5d2e7db3c2ee149064cc7c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 11 Jul 2008 19:41:19 +0200
Subject: x86: fix savesegment() bug causing crashes on 64-bit

i spent a fair amount of time chasing a 64-bit bootup crash that manifested
itself as bootup segfaults:

  S10network[1825]: segfault at 7f3e2b5d16b8 ip 00000031108748c9 sp 00007fffb9c14c70 error 4 in libc-2.7.so[3110800000+14d000]

eventually causing init to die and panic the system:

  Kernel panic - not syncing: Attempted to kill init!
  Pid: 1, comm: init Not tainted 2.6.26-rc9-tip #13878

after a maratonic bisection session, the bad commit turned out to be:

| b7675791859075418199c7af86a116ea34eaf5bd is first bad commit
| commit b7675791859075418199c7af86a116ea34eaf5bd
| Author: Jeremy Fitzhardinge <jeremy@goop.org>
| Date:   Wed Jun 25 00:19:00 2008 -0400
|
|     x86: remove open-coded save/load segment operations
|
|     This removes a pile of buggy open-coded implementations of savesegment
|     and loadsegment.

after some more bisection of this patch itself, it turns out that what
makes the difference are the savesegment() changes to __switch_to().

Taking a look at this portion of arch/x86/kernel/process_64.o revealed
this crutial difference:

| good:    99c:       8c e0                   mov    %fs,%eax
|          99e:       89 45 cc                mov    %eax,-0x34(%rbp)
|
| bad:     99c:       8c 65 cc                mov    %fs,-0x34(%rbp)

which is due to:

|                 unsigned fsindex;
| -               asm volatile("movl %%fs,%0" : "=r" (fsindex));
| +               savesegment(fs, fsindex);

savesegment() is implemented as:

 #define savesegment(seg, value)                                \
          asm("mov %%" #seg ",%0":"=rm" (value) : : "memory")

note the "m" modifier - it allows GCC to generate the segment move
into a memory operand as well.

But regarding segment operands there's a subtle detail in the x86
instruction set: the above 16-bit moves are zero-extend, but only
if it goes to a register.

If it goes to a memory operand, -0x34(%rbp) in the above case, there's
no zero-extend to 32-bit and the instruction will only save 16 bits
instead of the intended 32-bit.

The other 16 bits is random data - which can cause problems when that
value is used later on.

The solution is to only allow segment operands to go to registers.
This fix allows my test-system to boot up without crashing.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/system.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index c4946c5964b..983ce37c491 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -160,7 +160,7 @@ extern void native_load_gs_index(unsigned);
  * Save a segment register away
  */
 #define savesegment(seg, value)				\
-	asm("mov %%" #seg ",%0":"=rm" (value) : : "memory")
+	asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
 
 static inline unsigned long get_limit(unsigned long segment)
 {
-- 
cgit v1.2.3-70-g09d2


From 70f1bba4c8ff88d7fd9e06f1b2cb2b3c58e8e5f9 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Thu, 10 Jul 2008 15:14:57 -0300
Subject: x86: use ignore macro instead of hash comment

In dwarf_64.h header, use the "ignore" macro the way
i386 does.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/dwarf2_64.h | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/dwarf2_64.h b/include/asm-x86/dwarf2_64.h
index c950519a264..47776f461e5 100644
--- a/include/asm-x86/dwarf2_64.h
+++ b/include/asm-x86/dwarf2_64.h
@@ -35,21 +35,23 @@
 
 #else
 
-/* use assembler line comment character # to ignore the arguments. */
-#define CFI_STARTPROC	#
-#define CFI_ENDPROC	#
-#define CFI_DEF_CFA	#
-#define CFI_DEF_CFA_REGISTER	#
-#define CFI_DEF_CFA_OFFSET	#
-#define CFI_ADJUST_CFA_OFFSET	#
-#define CFI_OFFSET	#
-#define CFI_REL_OFFSET	#
-#define CFI_REGISTER	#
-#define CFI_RESTORE	#
-#define CFI_REMEMBER_STATE	#
-#define CFI_RESTORE_STATE	#
-#define CFI_UNDEFINED	#
-#define CFI_SIGNAL_FRAME	#
+.macro ignore a=0, b=0, c=0, d=0
+.endm
+
+#define CFI_STARTPROC	ignore
+#define CFI_ENDPROC	ignore
+#define CFI_DEF_CFA	ignore
+#define CFI_DEF_CFA_REGISTER	ignore
+#define CFI_DEF_CFA_OFFSET	ignore
+#define CFI_ADJUST_CFA_OFFSET	ignore
+#define CFI_OFFSET	ignore
+#define CFI_REL_OFFSET	ignore
+#define CFI_REGISTER	ignore
+#define CFI_RESTORE	ignore
+#define CFI_REMEMBER_STATE	ignore
+#define CFI_RESTORE_STATE	ignore
+#define CFI_UNDEFINED	ignore
+#define CFI_SIGNAL_FRAME	ignore
 
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From d73a731abe2a77fb54c5de4f72036b60ce3a048e Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Thu, 10 Jul 2008 15:17:53 -0300
Subject: x86: use AS_CFI instead of UNWIND_INFO

In dwarf2_32.h, test for CONFIG_AS_CFI instead of
CONFIG_UNWIND_INFO. Turns out that searching for UNWIND_INFO
returns no match in any Kconfig or Makefile, so we're really
just throwing everything away regarding dwarf frames for i386.

The test that generates CONFIG_AS_CFI does not have anything
x86_64-specific, and right now, checking V=1 builds shows me
that the flags is there anyway, although unused.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/dwarf2_32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/dwarf2_32.h b/include/asm-x86/dwarf2_32.h
index 6d66398a307..0bfe250894f 100644
--- a/include/asm-x86/dwarf2_32.h
+++ b/include/asm-x86/dwarf2_32.h
@@ -12,7 +12,7 @@
    away for older version.
  */
 
-#ifdef CONFIG_UNWIND_INFO
+#ifdef CONFIG_AS_CFI
 
 #define CFI_STARTPROC .cfi_startproc
 #define CFI_ENDPROC .cfi_endproc
-- 
cgit v1.2.3-70-g09d2


From 392a0fc96bd059b38564f5f8fb58327460cb5a9d Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Fri, 11 Jul 2008 12:36:52 -0300
Subject: x86: merge dwarf2 headers

Merge dwarf2_32.h and dwarf2_64.h into dwarf2.h.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/dwarf2.h    | 62 ++++++++++++++++++++++++++++++++++++++++++---
 include/asm-x86/dwarf2_32.h | 61 --------------------------------------------
 include/asm-x86/dwarf2_64.h | 58 ------------------------------------------
 3 files changed, 59 insertions(+), 122 deletions(-)
 delete mode 100644 include/asm-x86/dwarf2_32.h
 delete mode 100644 include/asm-x86/dwarf2_64.h

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/dwarf2.h b/include/asm-x86/dwarf2.h
index b3cbb0ccae1..0bfe250894f 100644
--- a/include/asm-x86/dwarf2.h
+++ b/include/asm-x86/dwarf2.h
@@ -1,5 +1,61 @@
-#ifdef CONFIG_X86_32
-# include "dwarf2_32.h"
+#ifndef _DWARF2_H
+#define _DWARF2_H
+
+#ifndef __ASSEMBLY__
+#warning "asm/dwarf2.h should be only included in pure assembly files"
+#endif
+
+/*
+   Macros for dwarf2 CFI unwind table entries.
+   See "as.info" for details on these pseudo ops. Unfortunately
+   they are only supported in very new binutils, so define them
+   away for older version.
+ */
+
+#ifdef CONFIG_AS_CFI
+
+#define CFI_STARTPROC .cfi_startproc
+#define CFI_ENDPROC .cfi_endproc
+#define CFI_DEF_CFA .cfi_def_cfa
+#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
+#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
+#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
+#define CFI_OFFSET .cfi_offset
+#define CFI_REL_OFFSET .cfi_rel_offset
+#define CFI_REGISTER .cfi_register
+#define CFI_RESTORE .cfi_restore
+#define CFI_REMEMBER_STATE .cfi_remember_state
+#define CFI_RESTORE_STATE .cfi_restore_state
+#define CFI_UNDEFINED .cfi_undefined
+
+#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
+#define CFI_SIGNAL_FRAME .cfi_signal_frame
+#else
+#define CFI_SIGNAL_FRAME
+#endif
+
 #else
-# include "dwarf2_64.h"
+
+/* Due to the structure of pre-exisiting code, don't use assembler line
+   comment character # to ignore the arguments. Instead, use a dummy macro. */
+.macro ignore a=0, b=0, c=0, d=0
+.endm
+
+#define CFI_STARTPROC	ignore
+#define CFI_ENDPROC	ignore
+#define CFI_DEF_CFA	ignore
+#define CFI_DEF_CFA_REGISTER	ignore
+#define CFI_DEF_CFA_OFFSET	ignore
+#define CFI_ADJUST_CFA_OFFSET	ignore
+#define CFI_OFFSET	ignore
+#define CFI_REL_OFFSET	ignore
+#define CFI_REGISTER	ignore
+#define CFI_RESTORE	ignore
+#define CFI_REMEMBER_STATE ignore
+#define CFI_RESTORE_STATE ignore
+#define CFI_UNDEFINED ignore
+#define CFI_SIGNAL_FRAME ignore
+
+#endif
+
 #endif
diff --git a/include/asm-x86/dwarf2_32.h b/include/asm-x86/dwarf2_32.h
deleted file mode 100644
index 0bfe250894f..00000000000
--- a/include/asm-x86/dwarf2_32.h
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef _DWARF2_H
-#define _DWARF2_H
-
-#ifndef __ASSEMBLY__
-#warning "asm/dwarf2.h should be only included in pure assembly files"
-#endif
-
-/*
-   Macros for dwarf2 CFI unwind table entries.
-   See "as.info" for details on these pseudo ops. Unfortunately
-   they are only supported in very new binutils, so define them
-   away for older version.
- */
-
-#ifdef CONFIG_AS_CFI
-
-#define CFI_STARTPROC .cfi_startproc
-#define CFI_ENDPROC .cfi_endproc
-#define CFI_DEF_CFA .cfi_def_cfa
-#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
-#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
-#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
-#define CFI_OFFSET .cfi_offset
-#define CFI_REL_OFFSET .cfi_rel_offset
-#define CFI_REGISTER .cfi_register
-#define CFI_RESTORE .cfi_restore
-#define CFI_REMEMBER_STATE .cfi_remember_state
-#define CFI_RESTORE_STATE .cfi_restore_state
-#define CFI_UNDEFINED .cfi_undefined
-
-#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
-#define CFI_SIGNAL_FRAME .cfi_signal_frame
-#else
-#define CFI_SIGNAL_FRAME
-#endif
-
-#else
-
-/* Due to the structure of pre-exisiting code, don't use assembler line
-   comment character # to ignore the arguments. Instead, use a dummy macro. */
-.macro ignore a=0, b=0, c=0, d=0
-.endm
-
-#define CFI_STARTPROC	ignore
-#define CFI_ENDPROC	ignore
-#define CFI_DEF_CFA	ignore
-#define CFI_DEF_CFA_REGISTER	ignore
-#define CFI_DEF_CFA_OFFSET	ignore
-#define CFI_ADJUST_CFA_OFFSET	ignore
-#define CFI_OFFSET	ignore
-#define CFI_REL_OFFSET	ignore
-#define CFI_REGISTER	ignore
-#define CFI_RESTORE	ignore
-#define CFI_REMEMBER_STATE ignore
-#define CFI_RESTORE_STATE ignore
-#define CFI_UNDEFINED ignore
-#define CFI_SIGNAL_FRAME ignore
-
-#endif
-
-#endif
diff --git a/include/asm-x86/dwarf2_64.h b/include/asm-x86/dwarf2_64.h
deleted file mode 100644
index 47776f461e5..00000000000
--- a/include/asm-x86/dwarf2_64.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef _DWARF2_H
-#define _DWARF2_H 1
-
-#ifndef __ASSEMBLY__
-#warning "asm/dwarf2.h should be only included in pure assembly files"
-#endif
-
-/*
-   Macros for dwarf2 CFI unwind table entries.
-   See "as.info" for details on these pseudo ops. Unfortunately
-   they are only supported in very new binutils, so define them
-   away for older version.
- */
-
-#ifdef CONFIG_AS_CFI
-
-#define CFI_STARTPROC .cfi_startproc
-#define CFI_ENDPROC .cfi_endproc
-#define CFI_DEF_CFA .cfi_def_cfa
-#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
-#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
-#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
-#define CFI_OFFSET .cfi_offset
-#define CFI_REL_OFFSET .cfi_rel_offset
-#define CFI_REGISTER .cfi_register
-#define CFI_RESTORE .cfi_restore
-#define CFI_REMEMBER_STATE .cfi_remember_state
-#define CFI_RESTORE_STATE .cfi_restore_state
-#define CFI_UNDEFINED .cfi_undefined
-#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
-#define CFI_SIGNAL_FRAME .cfi_signal_frame
-#else
-#define CFI_SIGNAL_FRAME
-#endif
-
-#else
-
-.macro ignore a=0, b=0, c=0, d=0
-.endm
-
-#define CFI_STARTPROC	ignore
-#define CFI_ENDPROC	ignore
-#define CFI_DEF_CFA	ignore
-#define CFI_DEF_CFA_REGISTER	ignore
-#define CFI_DEF_CFA_OFFSET	ignore
-#define CFI_ADJUST_CFA_OFFSET	ignore
-#define CFI_OFFSET	ignore
-#define CFI_REL_OFFSET	ignore
-#define CFI_REGISTER	ignore
-#define CFI_RESTORE	ignore
-#define CFI_REMEMBER_STATE	ignore
-#define CFI_RESTORE_STATE	ignore
-#define CFI_UNDEFINED	ignore
-#define CFI_SIGNAL_FRAME	ignore
-
-#endif
-
-#endif
-- 
cgit v1.2.3-70-g09d2


From 5ac37f87ff18843aabab84cf75b2f8504c2d81fe Mon Sep 17 00:00:00 2001
From: Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>
Date: Fri, 11 Jul 2008 18:04:46 +0200
Subject: x86: fix ldt limit for 64 bit

Fix size of LDT entries. On x86-64, ldt_desc is a double-sized descriptor.

Signed-off-by: Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/desc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index 268a012bcd7..28bddbcb38b 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -192,8 +192,8 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 		unsigned cpu = smp_processor_id();
 		ldt_desc ldt;
 
-		set_tssldt_descriptor(&ldt, (unsigned long)addr,
-				      DESC_LDT, entries * sizeof(ldt) - 1);
+		set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
+				      entries * LDT_ENTRY_SIZE - 1);
 		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
 				&ldt, DESC_LDT);
 		asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
-- 
cgit v1.2.3-70-g09d2


From 3d88cca7085cffce077f808f36551e9050eb9e3a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 12 Jul 2008 22:52:55 -0700
Subject: x86: fix numaq_tsc_disable calling

got this on a test-system:

 calling  numaq_tsc_disable+0x0/0x39
 NUMAQ: disabling TSC
 initcall numaq_tsc_disable+0x0/0x39 returned 0 after 0 msecs

that's because we should not be using arch_initcall to call numaq_tsc_disable.

need to call it in setup_arch before time_init()/tsc_init()
and call it in init_intel() to make the cpu feature bits right.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/intel.c | 4 ++++
 arch/x86/kernel/numaq_32.c  | 7 ++++---
 arch/x86/kernel/setup.c     | 8 ++++++++
 include/asm-x86/numaq.h     | 2 ++
 4 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fe9224c51d3..70609efdf1d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -226,6 +226,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
 	if (cpu_has_bts)
 		ds_init_intel(c);
+
+#ifdef CONFIG_X86_NUMAQ
+	numaq_tsc_disable();
+#endif
 }
 
 static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index f0f1de1c4a1..5b20a5e7ac2 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -93,12 +93,13 @@ int __init get_memcfg_numaq(void)
 	return 1;
 }
 
-static int __init numaq_tsc_disable(void)
+void __init numaq_tsc_disable(void)
 {
+	if (!found_numaq)
+		return -1;
+
 	if (num_online_nodes() > 1) {
 		printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
 		setup_clear_cpu_cap(X86_FEATURE_TSC);
 	}
-	return 0;
 }
-arch_initcall(numaq_tsc_disable);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 987b6fde3a9..36c540d4ac4 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -849,6 +849,14 @@ void __init setup_arch(char **cmdline_p)
 	init_cpu_to_node();
 #endif
 
+#ifdef CONFIG_X86_NUMAQ
+	/*
+	 * need to check online nodes num, call it
+	 * here before time_init/tsc_init
+	 */
+	numaq_tsc_disable();
+#endif
+
 	init_apic_mappings();
 	ioapic_init_mappings();
 
diff --git a/include/asm-x86/numaq.h b/include/asm-x86/numaq.h
index ef068d2465d..34b92d581fa 100644
--- a/include/asm-x86/numaq.h
+++ b/include/asm-x86/numaq.h
@@ -157,6 +157,8 @@ struct sys_cfg_data {
 	struct		eachquadmem eq[MAX_NUMNODES];	/* indexed by quad id */
 };
 
+void numaq_tsc_disable(void);
+
 #else
 static inline int get_memcfg_numaq(void)
 {
-- 
cgit v1.2.3-70-g09d2


From ce8b06b985ae48f9425de6e4641e77cb3613ef00 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Sun, 13 Jul 2008 03:29:42 +0100
Subject: x86: I/O APIC: remove an IRQ2-mask hack

Now that IRQ2 is never made available to the I/O APIC, there is no need
to special-case it and mask as a workaround for broken systems.  Actually,
because of the former, mask_IO_APIC_irq(2) is a no-op already.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c  |  1 -
 arch/x86/kernel/io_apic_32.c | 10 ----------
 arch/x86/kernel/io_apic_64.c | 10 ----------
 include/asm-x86/genapic_32.h |  5 -----
 include/asm-x86/genapic_64.h |  6 ------
 5 files changed, 32 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 785700a08e9..f489d7a9be9 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1409,7 +1409,6 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
 {
 	pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident);
 	acpi_skip_timer_override = 1;
-	force_mask_ioapic_irq_2();
 	return 0;
 }
 
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index c50adb84ea6..603261a5885 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -59,13 +59,6 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 static DEFINE_SPINLOCK(ioapic_lock);
 static DEFINE_SPINLOCK(vector_lock);
 
-static bool mask_ioapic_irq_2 __initdata;
-
-void __init force_mask_ioapic_irq_2(void)
-{
-	mask_ioapic_irq_2 = true;
-}
-
 int timer_through_8259 __initdata;
 
 /*
@@ -2187,9 +2180,6 @@ static inline void __init check_timer(void)
 	printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
 		vector, apic1, pin1, apic2, pin2);
 
-	if (mask_ioapic_irq_2)
-		mask_IO_APIC_irq(2);
-
 	/*
 	 * Some BIOS writers are clueless and report the ExtINTA
 	 * I/O APIC input from the cascaded 8259A as the timer
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 9e645cba11c..b16ef029cf8 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -94,13 +94,6 @@ static int no_timer_check;
 
 static int disable_timer_pin_1 __initdata;
 
-static bool mask_ioapic_irq_2 __initdata;
-
-void __init force_mask_ioapic_irq_2(void)
-{
-	mask_ioapic_irq_2 = true;
-}
-
 int timer_through_8259 __initdata;
 
 /* Where if anywhere is the i8259 connect in external int mode */
@@ -1706,9 +1699,6 @@ static inline void __init check_timer(void)
 	apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
 		cfg->vector, apic1, pin1, apic2, pin2);
 
-	if (mask_ioapic_irq_2)
-		mask_IO_APIC_irq(2);
-
 	/*
 	 * Some BIOS writers are clueless and report the ExtINTA
 	 * I/O APIC input from the cascaded 8259A as the timer
diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h
index 33a73f5ed22..b02ea6e17de 100644
--- a/include/asm-x86/genapic_32.h
+++ b/include/asm-x86/genapic_32.h
@@ -119,10 +119,5 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
 #define is_uv_system()			0
 #define uv_wakeup_secondary(a, b)	1
 
-#ifdef CONFIG_X86_IO_APIC
-extern void force_mask_ioapic_irq_2(void);
-#else
-static inline void force_mask_ioapic_irq_2(void) { }
-#endif
 
 #endif
diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h
index 647e4e5c258..0f8504627c4 100644
--- a/include/asm-x86/genapic_64.h
+++ b/include/asm-x86/genapic_64.h
@@ -46,10 +46,4 @@ extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
 
 extern void setup_apic_routing(void);
 
-#ifdef CONFIG_X86_IO_APIC
-extern void force_mask_ioapic_irq_2(void);
-#else
-static inline void force_mask_ioapic_irq_2(void) { }
-#endif
-
 #endif
-- 
cgit v1.2.3-70-g09d2


From 11369f356b66d363a615fde2c5526962f7683674 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 8 Jul 2008 14:35:21 -0700
Subject: x86: change _node_to_cpumask_ptr to return const ptr

  * Strengthen the return type for the _node_to_cpumask_ptr to be
    a const pointer.  This adds compiler checking to insure that
    node_to_cpumask_map[] is not changed inadvertently.

Signed-off-by: Mike Travis <travis@sgi.com>
Cc: "akpm@linux-foundation.org" <akpm@linux-foundation.org>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Acked-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c |  8 ++++----
 include/asm-generic/topology.h |  3 ++-
 include/asm-x86/topology.h     | 10 +++++-----
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 5fc310f746f..cac68430d31 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -343,23 +343,23 @@ static const cpumask_t cpu_mask_none;
 /*
  * Returns a pointer to the bitmask of CPUs on Node 'node'.
  */
-cpumask_t *_node_to_cpumask_ptr(int node)
+const cpumask_t *_node_to_cpumask_ptr(int node)
 {
 	if (node_to_cpumask_map == NULL) {
 		printk(KERN_WARNING
 			"_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
 			node);
 		dump_stack();
-		return &cpu_online_map;
+		return (const cpumask_t *)&cpu_online_map;
 	}
 	if (node >= nr_node_ids) {
 		printk(KERN_WARNING
 			"_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n",
 			node, nr_node_ids);
 		dump_stack();
-		return (cpumask_t *)&cpu_mask_none;
+		return &cpu_mask_none;
 	}
-	return (cpumask_t *)&node_to_cpumask_map[node];
+	return &node_to_cpumask_map[node];
 }
 EXPORT_SYMBOL(_node_to_cpumask_ptr);
 
diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h
index a6aea79bca4..54bbf6e04ee 100644
--- a/include/asm-generic/topology.h
+++ b/include/asm-generic/topology.h
@@ -60,7 +60,8 @@
 #ifndef node_to_cpumask_ptr
 
 #define	node_to_cpumask_ptr(v, node) 					\
-		cpumask_t _##v = node_to_cpumask(node), *v = &_##v
+		cpumask_t _##v = node_to_cpumask(node);			\
+		const cpumask_t *v = &_##v
 
 #define node_to_cpumask_ptr_next(v, node)				\
 			  _##v = node_to_cpumask(node)
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index 98e5f17ea85..90ac7718469 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -82,7 +82,7 @@ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 extern int cpu_to_node(int cpu);
 extern int early_cpu_to_node(int cpu);
-extern cpumask_t *_node_to_cpumask_ptr(int node);
+extern const cpumask_t *_node_to_cpumask_ptr(int node);
 extern cpumask_t node_to_cpumask(int node);
 
 #else	/* !CONFIG_DEBUG_PER_CPU_MAPS */
@@ -103,7 +103,7 @@ static inline int early_cpu_to_node(int cpu)
 }
 
 /* Returns a pointer to the cpumask of CPUs on Node 'node'. */
-static inline cpumask_t *_node_to_cpumask_ptr(int node)
+static inline const cpumask_t *_node_to_cpumask_ptr(int node)
 {
 	return &node_to_cpumask_map[node];
 }
@@ -118,7 +118,7 @@ static inline cpumask_t node_to_cpumask(int node)
 
 /* Replace default node_to_cpumask_ptr with optimized version */
 #define node_to_cpumask_ptr(v, node)		\
-		cpumask_t *v = _node_to_cpumask_ptr(node)
+		const cpumask_t *v = _node_to_cpumask_ptr(node)
 
 #define node_to_cpumask_ptr_next(v, node)	\
 			   v = _node_to_cpumask_ptr(node)
@@ -186,7 +186,7 @@ extern int __node_distance(int, int);
 #define	cpu_to_node(cpu)	0
 #define	early_cpu_to_node(cpu)	0
 
-static inline cpumask_t *_node_to_cpumask_ptr(int node)
+static inline const cpumask_t *_node_to_cpumask_ptr(int node)
 {
 	return &cpu_online_map;
 }
@@ -201,7 +201,7 @@ static inline int node_to_first_cpu(int node)
 
 /* Replace default node_to_cpumask_ptr with optimized version */
 #define node_to_cpumask_ptr(v, node)		\
-		cpumask_t *v = _node_to_cpumask_ptr(node)
+		const cpumask_t *v = _node_to_cpumask_ptr(node)
 
 #define node_to_cpumask_ptr_next(v, node)	\
 			   v = _node_to_cpumask_ptr(node)
-- 
cgit v1.2.3-70-g09d2


From 2387ce57a8167490d3b34a7e1ffa9a64a1a76244 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 13 Jul 2008 14:50:56 -0700
Subject: x86: make 64bit hpet_set_mapping to use ioremap too, v2

keep the one for VSYSCALL_HPET

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c      | 20 ++++----------------
 include/asm-x86/fixmap_64.h |  1 -
 2 files changed, 4 insertions(+), 17 deletions(-)

(limited to 'include/asm-x86')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ea230ec6905..0ea6a19bfdf 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -36,26 +36,15 @@ static inline void hpet_writel(unsigned long d, unsigned long a)
 }
 
 #ifdef CONFIG_X86_64
-
 #include <asm/pgtable.h>
-
-static inline void hpet_set_mapping(void)
-{
-	set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
-	__set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
-	hpet_virt_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
-}
-
-static inline void hpet_clear_mapping(void)
-{
-	hpet_virt_address = NULL;
-}
-
-#else
+#endif
 
 static inline void hpet_set_mapping(void)
 {
 	hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+#ifdef CONFIG_X86_64
+	__set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
+#endif
 }
 
 static inline void hpet_clear_mapping(void)
@@ -63,7 +52,6 @@ static inline void hpet_clear_mapping(void)
 	iounmap(hpet_virt_address);
 	hpet_virt_address = NULL;
 }
-#endif
 
 /*
  * HPET command line enable / disable
diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h
index 6a4789d57e6..00f3d74a052 100644
--- a/include/asm-x86/fixmap_64.h
+++ b/include/asm-x86/fixmap_64.h
@@ -40,7 +40,6 @@ enum fixed_addresses {
 	VSYSCALL_HPET,
 	FIX_DBGP_BASE,
 	FIX_EARLYCON_MEM_BASE,
-	FIX_HPET_BASE,
 	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
 	FIX_IO_APIC_BASE_0,
 	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
-- 
cgit v1.2.3-70-g09d2


From 3f1c38723eb467d34d704d0ee6e7b796ba4981ee Mon Sep 17 00:00:00 2001
From: Kevin Winchester <kjwinchester@gmail.com>
Date: Mon, 14 Jul 2008 21:36:13 -0300
Subject: x86: Fix compile error with CONFIG_AS_CFI=n

   AS      arch/x86/lib/csum-copy_64.o
arch/x86/lib/csum-copy_64.S: Assembler messages:
arch/x86/lib/csum-copy_64.S:48: Error: Macro `ignore' was already defined
make[1]: *** [arch/x86/lib/csum-copy_64.o] Error 1
make: *** [arch/x86/lib] Error 2

It appears that csum-copy_64.S and dwarf2.h both define an ignore macro.
I would expect one of them can be renamed quite easily, unless they
are references elsewhere.

Caused-by-commit: 392a0fc96bd059b38564f5f8fb58327460cb5a9d
    x86: merge dwarf2 headers

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/dwarf2.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/dwarf2.h b/include/asm-x86/dwarf2.h
index 0bfe250894f..738bb9fb3e5 100644
--- a/include/asm-x86/dwarf2.h
+++ b/include/asm-x86/dwarf2.h
@@ -38,23 +38,23 @@
 
 /* Due to the structure of pre-exisiting code, don't use assembler line
    comment character # to ignore the arguments. Instead, use a dummy macro. */
-.macro ignore a=0, b=0, c=0, d=0
+.macro cfi_ignore a=0, b=0, c=0, d=0
 .endm
 
-#define CFI_STARTPROC	ignore
-#define CFI_ENDPROC	ignore
-#define CFI_DEF_CFA	ignore
-#define CFI_DEF_CFA_REGISTER	ignore
-#define CFI_DEF_CFA_OFFSET	ignore
-#define CFI_ADJUST_CFA_OFFSET	ignore
-#define CFI_OFFSET	ignore
-#define CFI_REL_OFFSET	ignore
-#define CFI_REGISTER	ignore
-#define CFI_RESTORE	ignore
-#define CFI_REMEMBER_STATE ignore
-#define CFI_RESTORE_STATE ignore
-#define CFI_UNDEFINED ignore
-#define CFI_SIGNAL_FRAME ignore
+#define CFI_STARTPROC	cfi_ignore
+#define CFI_ENDPROC	cfi_ignore
+#define CFI_DEF_CFA	cfi_ignore
+#define CFI_DEF_CFA_REGISTER	cfi_ignore
+#define CFI_DEF_CFA_OFFSET	cfi_ignore
+#define CFI_ADJUST_CFA_OFFSET	cfi_ignore
+#define CFI_OFFSET	cfi_ignore
+#define CFI_REL_OFFSET	cfi_ignore
+#define CFI_REGISTER	cfi_ignore
+#define CFI_RESTORE	cfi_ignore
+#define CFI_REMEMBER_STATE cfi_ignore
+#define CFI_RESTORE_STATE cfi_ignore
+#define CFI_UNDEFINED cfi_ignore
+#define CFI_SIGNAL_FRAME cfi_ignore
 
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From c1e3b377ad48febba6f91b8ae42c44ee4d4ab45e Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Tue, 24 Jun 2008 17:58:53 +0800
Subject: ACPI: Create "idle=halt" bootparam

"idle=halt" limits the idle loop to using
the halt instruction.  No MWAIT, no IO accesses,
no C-states deeper than C1.

If something is broken in the idle code,
"idle=halt" is a less severe workaround
than "idle=poll" which disables all power savings.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 Documentation/kernel-parameters.txt |  4 +++-
 arch/ia64/kernel/process.c          |  2 ++
 arch/x86/kernel/process.c           | 17 ++++++++++++++++-
 drivers/acpi/processor_idle.c       | 22 ++++++++++++++++++++++
 include/asm-ia64/processor.h        |  1 +
 include/asm-x86/processor.h         |  1 +
 6 files changed, 45 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 312fe77764a..65db7f4711a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -818,7 +818,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			See Documentation/ide/ide.txt.
 
 	idle=		[X86]
-			Format: idle=poll or idle=mwait
+			Format: idle=poll or idle=mwait, idle=halt
 			Poll forces a polling idle loop that can slightly improves the performance
 			of waking up a idle CPU, but will use a lot of power and make the system
 			run hot. Not recommended.
@@ -826,6 +826,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			to not use it because it doesn't save as much power as a normal idle
 			loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same
 			as idle=poll.
+			idle=halt. Halt is forced to be used for CPU idle.
+			In such case C2/C3 won't be used again.
 
 	ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
 			Claim all unknown PCI IDE storage controllers.
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index fabaf08d9a6..612b3c4a060 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -55,6 +55,8 @@ void (*ia64_mark_idle)(int);
 
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
+unsigned long idle_halt;
+EXPORT_SYMBOL(idle_halt);
 
 void
 ia64_do_show_stack (struct unw_frame_info *info, void *arg)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7dceea94723..7fc72949876 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,10 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/clockchips.h>
+#include <asm/system.h>
+
+unsigned long idle_halt;
+EXPORT_SYMBOL(idle_halt);
 
 struct kmem_cache *task_xstate_cachep;
 
@@ -325,7 +329,18 @@ static int __init idle_setup(char *str)
 		pm_idle = poll_idle;
 	} else if (!strcmp(str, "mwait"))
 		force_mwait = 1;
-	else
+	else if (!strcmp(str, "halt")) {
+		/*
+		 * When the boot option of idle=halt is added, halt is
+		 * forced to be used for CPU idle. In such case CPU C2/C3
+		 * won't be used again.
+		 * To continue to load the CPU idle driver, don't touch
+		 * the boot_option_idle_override.
+		 */
+		pm_idle = default_idle;
+		idle_halt = 1;
+		return 0;
+	} else
 		return -1;
 
 	boot_option_idle_override = 1;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 0fc310e7dfd..c75c7ace8c1 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -41,6 +41,7 @@
 #include <linux/pm_qos_params.h>
 #include <linux/clockchips.h>
 #include <linux/cpuidle.h>
+#include <linux/cpuidle.h>
 
 /*
  * Include the apic definitions for x86 to have the APIC timer related defines
@@ -57,6 +58,7 @@
 
 #include <acpi/acpi_bus.h>
 #include <acpi/processor.h>
+#include <asm/processor.h>
 
 #define ACPI_PROCESSOR_COMPONENT        0x01000000
 #define ACPI_PROCESSOR_CLASS            "processor"
@@ -955,6 +957,17 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
 			} else {
 				continue;
 			}
+			if (cx.type == ACPI_STATE_C1 && idle_halt) {
+				/*
+				 * In most cases the C1 space_id obtained from
+				 * _CST object is FIXED_HARDWARE access mode.
+				 * But when the option of idle=halt is added,
+				 * the entry_method type should be changed from
+				 * CSTATE_FFH to CSTATE_HALT.
+				 */
+				cx.entry_method = ACPI_CSTATE_HALT;
+				snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
+			}
 		} else {
 			snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x",
 				 cx.address);
@@ -1780,6 +1793,15 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 		return 0;
 
 	if (!first_run) {
+		if (idle_halt) {
+			/*
+			 * When the boot option of "idle=halt" is added, halt
+			 * is used for CPU IDLE.
+			 * In such case C2/C3 is meaningless. So the max_cstate
+			 * is set to one.
+			 */
+			max_cstate = 1;
+		}
 		dmi_check_system(processor_power_dmi_table);
 		max_cstate = acpi_processor_cstate_check(max_cstate);
 		if (max_cstate < ACPI_C_STATES_MAX)
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index 6aff126fc07..f36e28a5f61 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -763,6 +763,7 @@ prefetchw (const void *x)
 #define spin_lock_prefetch(x)	prefetchw(x)
 
 extern unsigned long boot_option_idle_override;
+extern unsigned long idle_halt;
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 7f738270459..bc221623248 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -727,6 +727,7 @@ extern int			force_mwait;
 extern void select_idle_routine(const struct cpuinfo_x86 *c);
 
 extern unsigned long		boot_option_idle_override;
+extern unsigned long		idle_halt;
 
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);
-- 
cgit v1.2.3-70-g09d2


From da5e09a1b3e5a9fc0b15a3feb64e921ccc55ba74 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Tue, 24 Jun 2008 18:01:09 +0800
Subject: ACPI : Create "idle=nomwait" bootparam

"idle=nomwait" disables the use of the MWAIT
instruction from both C1 (C1_FFH) and deeper (C2C3_FFH)
C-states.

When MWAIT is unavailable, the BIOS and OS generally
negotiate to use the HALT instruction for C1,
and use IO accesses for deeper C-states.

This option is useful for power and performance
comparisons, and also to work around BIOS bugs
where broken MWAIT support is advertised.

http://bugzilla.kernel.org/show_bug.cgi?id=10807
http://bugzilla.kernel.org/show_bug.cgi?id=10914

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Li Shaohua <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 Documentation/kernel-parameters.txt |  3 ++-
 arch/ia64/kernel/process.c          |  2 ++
 arch/x86/kernel/process.c           | 11 +++++++++++
 drivers/acpi/processor_core.c       | 13 +++++++++++++
 drivers/acpi/processor_idle.c       |  6 +++++-
 include/asm-ia64/processor.h        |  1 +
 include/asm-x86/processor.h         |  1 +
 7 files changed, 35 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 65db7f4711a..5e497d16fb5 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -818,7 +818,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			See Documentation/ide/ide.txt.
 
 	idle=		[X86]
-			Format: idle=poll or idle=mwait, idle=halt
+			Format: idle=poll or idle=mwait, idle=halt, idle=nomwait
 			Poll forces a polling idle loop that can slightly improves the performance
 			of waking up a idle CPU, but will use a lot of power and make the system
 			run hot. Not recommended.
@@ -828,6 +828,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			as idle=poll.
 			idle=halt. Halt is forced to be used for CPU idle.
 			In such case C2/C3 won't be used again.
+			idle=nomwait. Disable mwait for CPU C-states
 
 	ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
 			Claim all unknown PCI IDE storage controllers.
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 612b3c4a060..3ab8373103e 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -57,6 +57,8 @@ unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
+unsigned long idle_nomwait;
+EXPORT_SYMBOL(idle_nomwait);
 
 void
 ia64_do_show_stack (struct unw_frame_info *info, void *arg)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7fc72949876..4d629c62f4f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -11,6 +11,8 @@
 
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
+unsigned long idle_nomwait;
+EXPORT_SYMBOL(idle_nomwait);
 
 struct kmem_cache *task_xstate_cachep;
 
@@ -340,6 +342,15 @@ static int __init idle_setup(char *str)
 		pm_idle = default_idle;
 		idle_halt = 1;
 		return 0;
+	} else if (!strcmp(str, "nomwait")) {
+		/*
+		 * If the boot option of "idle=nomwait" is added,
+		 * it means that mwait will be disabled for CPU C2/C3
+		 * states. In such case it won't touch the variable
+		 * of boot_option_idle_override.
+		 */
+		idle_nomwait = 1;
+		return 0;
 	} else
 		return -1;
 
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 9a803f85ccf..4e1bb89fd6c 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -265,7 +265,20 @@ static int acpi_processor_set_pdc(struct acpi_processor *pr)
 
 	if (!pdc_in)
 		return status;
+	if (idle_nomwait) {
+		/*
+		 * If mwait is disabled for CPU C-states, the C2C3_FFH access
+		 * mode will be disabled in the parameter of _PDC object.
+		 * Of course C1_FFH access mode will also be disabled.
+		 */
+		union acpi_object *obj;
+		u32 *buffer = NULL;
 
+		obj = pdc_in->pointer;
+		buffer = (u32 *)(obj->buffer.pointer);
+		buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH);
+
+	}
 	status = acpi_evaluate_object(pr->handle, "_PDC", pdc_in, NULL);
 
 	if (ACPI_FAILURE(status))
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index c75c7ace8c1..d592dbb1d12 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -957,13 +957,17 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
 			} else {
 				continue;
 			}
-			if (cx.type == ACPI_STATE_C1 && idle_halt) {
+			if (cx.type == ACPI_STATE_C1 &&
+					(idle_halt || idle_nomwait)) {
 				/*
 				 * In most cases the C1 space_id obtained from
 				 * _CST object is FIXED_HARDWARE access mode.
 				 * But when the option of idle=halt is added,
 				 * the entry_method type should be changed from
 				 * CSTATE_FFH to CSTATE_HALT.
+				 * When the option of idle=nomwait is added,
+				 * the C1 entry_method type should be
+				 * CSTATE_HALT.
 				 */
 				cx.entry_method = ACPI_CSTATE_HALT;
 				snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index f36e28a5f61..f88fa054d01 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -764,6 +764,7 @@ prefetchw (const void *x)
 
 extern unsigned long boot_option_idle_override;
 extern unsigned long idle_halt;
+extern unsigned long idle_nomwait;
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index bc221623248..55402d2ab93 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -728,6 +728,7 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c);
 
 extern unsigned long		boot_option_idle_override;
 extern unsigned long		idle_halt;
+extern unsigned long		idle_nomwait;
 
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);
-- 
cgit v1.2.3-70-g09d2


From 2567d71cc7acd99f0a0dd02e17fe17fd7df7b30c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 15 Jul 2008 15:02:27 +1000
Subject: x86: fix asm/e820.h for userspace inclusion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

asm-x86/e820.h is included from userspace.  'x86: make e820.c to have
common functions' (b79cd8f1268bab57ff85b19d131f7f23deab2dee) broke it:

	make -C Documentation/lguest
	cc -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include
lguest.c  -lz -o lguest
	In file included from ../../include/asm-x86/bootparam.h:8,
	                 from lguest.c:45:
	../../include/asm/e820.h:66: error: expected ‘)’ before ‘start’
	../../include/asm/e820.h:67: error: expected ‘)’ before ‘start’
	../../include/asm/e820.h:68: error: expected ‘)’ before ‘start’
	../../include/asm/e820.h:72: error: expected ‘=’, ‘,’, ‘;’, ‘asm’
or ‘__attribute__’ before ‘e820_update_range’
	...

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/e820.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 33e793e991d..06633b01dd5 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -59,6 +59,7 @@ struct e820map {
 	struct e820entry map[E820_X_MAX];
 };
 
+#ifdef __KERNEL__
 /* see comment in arch/x86/kernel/e820.c */
 extern struct e820map e820;
 extern struct e820map e820_saved;
@@ -115,7 +116,7 @@ extern void setup_memory_map(void);
 extern char *default_machine_specific_memory_setup(void);
 extern char *machine_specific_memory_setup(void);
 extern char *memory_setup(void);
-
+#endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
 #define ISA_START_ADDRESS	0xa0000
-- 
cgit v1.2.3-70-g09d2